[
  {
    "path": ".clang-format",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n---\nLanguage:        Cpp\n# BasedOnStyle:  LLVM\nAccessModifierOffset: -4\nAlignAfterOpenBracket: Align\nAlignConsecutiveAssignments: false\nAlignConsecutiveDeclarations: false\nAlignEscapedNewlinesLeft: false\nAlignOperands:   true\nAlignTrailingComments: true\nAllowAllParametersOfDeclarationOnNextLine: true\nAllowShortBlocksOnASingleLine: false\nAllowShortCaseLabelsOnASingleLine: false\nAllowShortFunctionsOnASingleLine: All\nAllowShortIfStatementsOnASingleLine: false\nAllowShortLoopsOnASingleLine: false\nAlwaysBreakAfterDefinitionReturnType: None\nAlwaysBreakAfterReturnType: None\nAlwaysBreakBeforeMultilineStrings: false\nAlwaysBreakTemplateDeclarations: true\nBinPackArguments: false\nBinPackParameters: false\nBraceWrapping:   \n  AfterClass:      true\n  AfterControlStatement: false\n  AfterEnum:       true\n  AfterFunction:   true\n  AfterNamespace:  false\n  AfterObjCDeclaration: false\n  AfterStruct:     true\n  AfterUnion:      true\n  BeforeCatch:     false\n  BeforeElse:      false\n  IndentBraces:    false\nBreakBeforeBinaryOperators: None\nBreakBeforeBraces: Custom\nBreakBeforeTernaryOperators: true\nBreakConstructorInitializersBeforeComma: false\nBreakAfterJavaFieldAnnotations: false\nBreakStringLiterals: true\nColumnLimit:     100\nCommentPragmas:  '^ IWYU pragma:'\nConstructorInitializerAllOnOneLineOrOnePerLine: true\nConstructorInitializerIndentWidth: 4\nContinuationIndentWidth: 4\nCpp11BracedListStyle: true\nDerivePointerAlignment: false\nDisableFormat:   false\nExperimentalAutoDetectBinPacking: true\nForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]\nIncludeCategories: \n  - Regex:           '^\"(llvm|llvm-c|clang|clang-c)/'\n    Priority:        2\n  - Regex:           '^(<|\"(gtest|isl|json)/)'\n    Priority:        3\n  - Regex:           '.*'\n    Priority:        1\nIncludeIsMainRegex: '$'\nIndentCaseLabels: false\nIndentWidth:     4\nIndentWrappedFunctionNames: false\nJavaScriptQuotes: Leave\nJavaScriptWrapImports: true\nKeepEmptyLinesAtTheStartOfBlocks: true\nMacroBlockBegin: ''\nMacroBlockEnd:   ''\nMaxEmptyLinesToKeep: 1\nNamespaceIndentation: None\nObjCBlockIndentWidth: 2\nObjCSpaceAfterProperty: false\nObjCSpaceBeforeProtocolList: true\nPenaltyBreakBeforeFirstCallParameter: 19\nPenaltyBreakComment: 300\nPenaltyBreakFirstLessLess: 120\nPenaltyBreakString: 1000\nPenaltyExcessCharacter: 1000000\nPenaltyReturnTypeOnItsOwnLine: 60\nPointerAlignment: Right\nReflowComments:  true\nSortIncludes:    false\nSpaceAfterCStyleCast: false\nSpaceBeforeAssignmentOperators: true\nSpaceBeforeParens: ControlStatements\nSpaceInEmptyParentheses: false\nSpacesBeforeTrailingComments: 1\nSpacesInAngles:  false\nSpacesInContainerLiterals: true\nSpacesInCStyleCastParentheses: false\nSpacesInParentheses: false\nSpacesInSquareBrackets: false\nStandard:        Cpp11\nTabWidth:        4\nUseTab:          Never\n...\n\n"
  },
  {
    "path": ".github/workflows/check_license_header.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n---\nname: License Check\non:\n  pull_request:\n  push:\n    branches:\n      - master\n      - 'v[0-9]+.*' # release branch\n      - ci-test # testing branch for github action\n      - '*dev' # developing branch\njobs:\n  license-check:\n    name: Check License Header\n    runs-on: ubuntu-latest\n    steps:\n      - name: \"Checkout\"\n        uses: actions/checkout@v2\n      - name: Check License Header\n        uses: apache/skywalking-eyes@v0.3.0\n        with:\n          # Optional: set the log level. The default value is `info`.\n          # log: debug\n          # Optional: set the config file. The default value is `.licenserc.yaml`.\n          config: .licenserc.yaml\n"
  },
  {
    "path": ".github/workflows/compilation.yaml",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nname: rdsn compilation\n\non:\n  push:\n    # when new tag created\n    tags:\n      - v*\n    # when this file is updated\n    paths:\n      - '.github/workflows/compilation.yml'\n\n  # for manually triggering workflow\n  workflow_dispatch:\n\n  # run for every day 2am UTC+8(Beijing)\n  schedule:\n    - cron:  '0 18 */1 * *'\n\njobs:\n  compilation:\n    runs-on: ubuntu-latest\n    strategy:\n      fail-fast: false\n      matrix:\n        os:\n          - ubuntu1604\n          - ubuntu1804\n          - ubuntu2004\n          #- centos6 is EOL\n          - centos7\n        compiler-family:\n          - gcc\n        include:\n          - compiler-family: clang\n            compiler: \"clang-10,clang++-10\"\n            os: ubuntu2004\n          - compiler-family: clang\n            compiler: \"clang-9,clang++-9\"\n            os: ubuntu1804\n    container:\n      image: apache/pegasus:thirdparties-bin-${{ matrix.os }}\n    steps:\n      - uses: actions/checkout@v2\n      - name: Unpack prebuilt third-parties\n        run: unzip /root/thirdparties-bin.zip -d ./thirdparty\n      - name: Compilation on GCC\n        if: ${{ matrix.compiler-family == 'gcc' }}\n        run: ./run.sh build -c --skip_thirdparty\n      - name: Compilation on Clang\n        if: ${{ matrix.compiler-family == 'clang' }}\n        env:\n          COMPILER: ${{ matrix.compiler }}\n        run: ./run.sh build --compiler $COMPILER --skip_thirdparty\n"
  },
  {
    "path": ".github/workflows/cpp_actions.yaml",
    "content": "# Developer Notes:\n#\n# This config is for github actions. Before merging your changes of this file,\n# it's recommended to create a PR against the ci-test branch to test if it works\n# as expected.\n\nname: Code Lint and Unit Test\n\non:\n  # run on each pull request\n  pull_request:\n    types: [ synchronize, opened, reopened ]\n    paths:\n      - .github/workflows/cpp_actions.yaml\n      - CMakeLists.txt\n      - bin/**\n      - compile_thrift.py\n      - include/**\n      - run.sh\n      - scripts/**\n      - src/**\n      - thirdparty/**\n    branches:\n      - master\n      - 'v[0-9]+.*' # release branch\n      - ci-test # testing branch for github action\n      - '*dev' # developing branch\n\n  # for manually triggering workflow\n  workflow_dispatch:\n\ndefaults:\n  run:\n    shell: bash\n\njobs:\n  lint:\n    name: Lint\n    runs-on: ubuntu-latest\n    container:\n      image: apache/pegasus:clang-format-3.9\n    steps:\n      - uses: actions/checkout@v2\n        with:\n          fetch-depth: 1\n      - name: clang-format\n        run: ./scripts/linux/run-clang-format.py --clang-format-executable=clang-format-3.9 -r src include\n\n  test:\n    name: Test\n    needs: lint\n    runs-on: self-hosted\n    container:\n      image: apache/pegasus:thirdparties-bin-ubuntu1804\n      env:\n        CCACHE_DIR: /tmp/ccache/pegasus\n        CCACHE_MAXSIZE: 10G\n      volumes:\n        # Place ccache compilation intermediate results in host memory, that's shared among containers.\n        - /tmp/ccache/pegasus:/tmp/ccache/pegasus\n      # Read docs at https://docs.docker.com/storage/tmpfs/ for more details of using tmpfs in docker.\n      options: --mount type=tmpfs,destination=/tmp/pegasus --cap-add=SYS_PTRACE\n    steps:\n      - uses: actions/checkout@v2\n      - uses: dorny/paths-filter@v2\n        id: changes\n        with:\n          filters: |\n            thirdparty:\n              - 'thirdparty/**'\n      - name: Unpack prebuilt third-parties\n        if: steps.changes.outputs.thirdparty == 'false'\n        run: unzip /root/thirdparties-bin.zip -d ./thirdparty\n      - name: Rebuild third-parties\n        if: steps.changes.outputs.thirdparty == 'true'\n        working-directory: thirdparty\n        run: |\n          mkdir build\n          cmake -DCMAKE_BUILD_TYPE=Release -B build/\n          cmake --build build/ -j $(($(nproc)/2+1))\n      - name: Compilation\n        run: ./run.sh build -c --skip_thirdparty\n      - name: Unit Testing\n        run: |\n          export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server          \n          ulimit -s unlimited\n          ./run.sh test --skip_thirdparty\n\n  test-with-sanitizer-address:\n    name: Test With Sanitizer Address\n    needs: lint\n    runs-on: self-hosted\n    container:\n      image: apache/pegasus:thirdparties-bin-ubuntu1804\n      env:\n        CCACHE_DIR: /tmp/ccache/pegasus\n        CCACHE_MAXSIZE: 10G\n      volumes:\n        # Place ccache compilation intermediate results in host memory, that's shared among containers.\n        - /tmp/ccache/pegasus:/tmp/ccache/pegasus\n      # Read docs at https://docs.docker.com/storage/tmpfs/ for more details of using tmpfs in docker.\n      options: --mount type=tmpfs,destination=/tmp/pegasus --cap-add=SYS_PTRACE\n    steps:\n      - uses: actions/checkout@v2\n      - uses: dorny/paths-filter@v2\n        id: changes\n        with:\n          filters: |\n            thirdparty:\n              - 'thirdparty/**'\n      - name: Unpack prebuilt third-parties\n        if: steps.changes.outputs.thirdparty == 'false'\n        run: unzip /root/thirdparties-bin.zip -d ./thirdparty\n      - name: Rebuild third-parties\n        if: steps.changes.outputs.thirdparty == 'true'\n        working-directory: thirdparty\n        run: |\n          mkdir build\n          cmake -DCMAKE_BUILD_TYPE=Release -B build/\n          cmake --build build/ -j $(($(nproc)/2+1))\n      - name: Compilation\n        run: ./run.sh build -c --sanitizer address --skip_thirdparty --disable_gperf\n      - name: Unit Testing\n        run: |\n          export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server          \n          ulimit -s unlimited\n          ./run.sh test --sanitizer address --skip_thirdparty --disable_gperf\n\n  test-with-sanitizer-leak:\n    name: Test With Sanitizer Leak\n    needs: lint\n    runs-on: self-hosted\n    container:\n      image: apache/pegasus:thirdparties-bin-ubuntu1804\n      env:\n        CCACHE_DIR: /tmp/ccache/pegasus\n        CCACHE_MAXSIZE: 10G\n      volumes:\n        # Place ccache compilation intermediate results in host memory, that's shared among containers.\n        - /tmp/ccache/pegasus:/tmp/ccache/pegasus\n      # Read docs at https://docs.docker.com/storage/tmpfs/ for more details of using tmpfs in docker.\n      options: --mount type=tmpfs,destination=/tmp/pegasus --cap-add=SYS_PTRACE\n    steps:\n      - uses: actions/checkout@v2\n      - uses: dorny/paths-filter@v2\n        id: changes\n        with:\n          filters: |\n            thirdparty:\n              - 'thirdparty/**'\n      - name: Unpack prebuilt third-parties\n        if: steps.changes.outputs.thirdparty == 'false'\n        run: unzip /root/thirdparties-bin.zip -d ./thirdparty\n      - name: Rebuild third-parties\n        if: steps.changes.outputs.thirdparty == 'true'\n        working-directory: thirdparty\n        run: |\n          mkdir build\n          cmake -DCMAKE_BUILD_TYPE=Release -B build/\n          cmake --build build/ -j $(($(nproc)/2+1))\n      - name: Compilation\n        run: ./run.sh build -c --sanitizer leak --skip_thirdparty --disable_gperf\n      - name: Unit Testing\n        run: |\n          export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server          \n          ulimit -s unlimited\n          ./run.sh test --sanitizer leak --skip_thirdparty --disable_gperf\n\n  test-with-sanitizer-undefined:\n    name: Test With Sanitizer Undefined\n    needs: lint\n    runs-on: self-hosted\n    container:\n      image: apache/pegasus:thirdparties-bin-ubuntu1804\n      env:\n        CCACHE_DIR: /tmp/ccache/pegasus\n        CCACHE_MAXSIZE: 10G\n      volumes:\n        # Place ccache compilation intermediate results in host memory, that's shared among containers.\n        - /tmp/ccache/pegasus:/tmp/ccache/pegasus\n      # Read docs at https://docs.docker.com/storage/tmpfs/ for more details of using tmpfs in docker.\n      options: --mount type=tmpfs,destination=/tmp/pegasus --cap-add=SYS_PTRACE\n    steps:\n      - uses: actions/checkout@v2\n      - uses: dorny/paths-filter@v2\n        id: changes\n        with:\n          filters: |\n            thirdparty:\n              - 'thirdparty/**'\n      - name: Unpack prebuilt third-parties\n        if: steps.changes.outputs.thirdparty == 'false'\n        run: unzip /root/thirdparties-bin.zip -d ./thirdparty\n      - name: Rebuild third-parties\n        if: steps.changes.outputs.thirdparty == 'true'\n        working-directory: thirdparty\n        run: |\n          mkdir build\n          cmake -DCMAKE_BUILD_TYPE=Release -B build/\n          cmake --build build/ -j $(($(nproc)/2+1))\n      - name: Compilation\n        run: ./run.sh build -c --sanitizer undefined --skip_thirdparty --disable_gperf\n      - name: Unit Testing\n        run: |\n          export LD_LIBRARY_PATH=`pwd`/thirdparty/output/lib:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server          \n          ulimit -s unlimited\n          ./run.sh test --sanitizer undefined --skip_thirdparty --disable_gperf\n"
  },
  {
    "path": ".github/workflows/cpp_always_pass_actions.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n---\n# There are 'paths' filters for .github/workflows/cpp_actions.yaml, \n# it can be skipped, but lead CI pending, we add this\n# 'always pass' action to resolve it, see more details:\n# https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks\n\nname: Code Lint and Unit Test\n\non:\n  # run on each pull request\n  pull_request:\n    types: [ synchronize, opened, reopened ]\n    paths-ignore:\n      - .github/workflows/cpp_actions.yaml\n      - CMakeLists.txt\n      - bin/**\n      - compile_thrift.py\n      - include/**\n      - run.sh\n      - scripts/**\n      - src/**\n      - thirdparty/**\n    branches:\n      - master\n      - 'v[0-9]+.*' # release branch\n      - ci-test # testing branch for github action\n      - '*dev' # developing branch\n\n  # for manually triggering workflow\n  workflow_dispatch:\n\njobs:\n  lint:\n    name: Lint\n    runs-on: ubuntu-latest\n    steps:\n      - run: 'echo \"No build required\" '\n\n  test:\n    name: Test\n    runs-on: ubuntu-latest\n    steps:\n      - run: 'echo \"No build required\" '\n\n  test-with-sanitizer-address:\n    name: Test With Sanitizer Address\n    runs-on: ubuntu-latest\n    steps:\n      - run: 'echo \"No build required\" '\n\n  test-with-sanitizer-leak:\n    name: Test With Sanitizer Leak\n    runs-on: ubuntu-latest\n    steps:\n      - run: 'echo \"No build required\" '\n\n  test-with-sanitizer-undefined:\n    name: Test With Sanitizer Undefined\n    runs-on: ubuntu-latest\n    steps:\n      - run: 'echo \"No build required\" '\n"
  },
  {
    "path": ".github/workflows/issue_ref.yaml",
    "content": "#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n#\n\nname: Issue Check\n\non:\n  pull_request:\n    types: ['opened', 'edited', 'reopened', 'synchronize']\n\njobs:\n  title:\n    name: Issue check\n    runs-on: ubuntu-latest\n\n    steps:\n      - name: Check for ticket\n        uses: neofinancial/ticket-check-action@v1.3.0\n        with:\n          token: ${{ secrets.GITHUB_TOKEN }}\n          ticketPrefix: '#'\n          bodyURLRegex: 'http(s?):\\/\\/(github.com)(\\/apache)(\\/incubator-pegasus)(\\/issues)\\/\\d+'\n"
  },
  {
    "path": ".github/workflows/non_cpp_actions.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n---\nname: Non-cpp Code Lint\n\non:\n  # run on each pull request\n  pull_request:\n    types: [ synchronize, opened, reopened ]\n    branches:\n      - master\n      - 'v[0-9]+.*' # release branch\n      - ci-test # testing branch for github action\n      - '*dev' # developing branch\n\n  # for manually triggering workflow\n  workflow_dispatch:\n\ndefaults:\n  run:\n    shell: bash\n\njobs:\n  semantic_title:\n    name: Lint PR title\n    runs-on: ubuntu-latest\n    steps:\n      - uses: amannn/action-semantic-pull-request@v4.3.0\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n\n  markdown-link-check:\n    name: Check Markdown links\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v2\n      - uses: gaurav-nelson/github-action-markdown-link-check@1.0.13\n"
  },
  {
    "path": ".gitignore",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n.matchfile\n.kdev4/\n.zk_install/\n.idea\n.vscode/\n\ngcov_report/\nbin/Linux/thrift\nbuilder/\ninstall/\ntest_reports/\nDockerfile\ninclude/thrift/\nDSN_ROOT\n\nrdsn.github.config\nrdsn.github.creator\nrdsn.github.creator.user*\nrdsn.github.files\nrdsn.github.includes\nrDSN.github.*\nupdate_qt_config.sh\nzookeeper-3.4.6.tar.gz\nzookeeper-3.4.6/\n\nthirdparty/build\nthirdparty/src\nthirdparty/output\n\npackages/\ncmake-build-debug/\n"
  },
  {
    "path": ".licenserc.yaml",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\nheader:\n  license:\n    spdx-id: MIT\n    copyright-owner: Microsoft Corporation\n    content: |\n      The MIT License (MIT)\n     \n      Copyright (c) 2015 Microsoft Corporation\n     \n      -=- Robust Distributed System Nucleus (rDSN) -=-\n     \n      Permission is hereby granted, free of charge, to any person obtaining a copy\n      of this software and associated documentation files (the \"Software\"), to deal\n      in the Software without restriction, including without limitation the rights\n      to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n      copies of the Software, and to permit persons to whom the Software is\n      furnished to do so, subject to the following conditions:\n     \n      The above copyright notice and this permission notice shall be included in\n      all copies or substantial portions of the Software.\n     \n      THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n      IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n      FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n      AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n      LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n      OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n      THE SOFTWARE.\n\n  paths-ignore:\n    # All the type of licenses of this project should be added to LICENSE.\n    - 'LICENSE'\n    # Apache license 2.0\n    - 'src/common/backup.thrift'\n    - 'src/common/bulk_load.thrift'\n    - 'src/common/duplication.thrift'\n    - 'src/common/partition_split.thrift'\n    - 'src/runtime/rpc/request_meta.thrift'\n    - 'src/runtime/security/security.thrift'\n    - '.licenserc.yaml'\n    - 'bin/FindJemalloc.cmake'\n    - 'bin/compiler_info.cmake'\n    - 'bin/thrift_utils.cmake'\n    - 'CONTRIBUTING.md'\n    - '.github/workflows/check_license_header.yaml'\n    - '.github/workflows/compilation.yaml'\n    - '.github/workflows/cpp_actions.yaml'\n    - '.github/workflows/cpp_always_pass_actions.yaml'\n    - '.github/workflows/issue_ref.yaml'\n    - '.github/workflows/non_cpp_actions.yaml'\n    - 'include/dsn/dist/fmt_logging.h'\n    - 'include/dsn/dist/block_service.h'\n    - 'include/dsn/dist/common.h'\n    - 'include/dsn/dist/remote_command.h'\n    - 'include/dsn/dist/replication/mutation_duplicator.h'\n    - 'include/dsn/dist/replication/duplication_common.h'\n    - 'include/dsn/dist/replication/replication_types.h'\n    - 'include/dsn/utility/clock.h'\n    - 'include/dsn/utility/preloadable.h'\n    - 'include/dsn/utility/sys_exit_hook.h'\n    - 'include/dsn/utils/token_buckets.h'\n    - 'include/dsn/utils/latency_tracer.h'\n    - 'include/dsn/utils/token_bucket_throttling_controller.h'\n    - 'include/dsn/http/http_server.h'\n    - 'include/dsn/cpp/message_utils.h'\n    - 'include/dsn/cpp/rpc_holder.h'\n    - 'include/dsn/cpp/pipeline.h'\n    - 'include/dsn/perf_counter/perf_counter_utils.h'\n    - 'include/dsn/utility/endians.h'\n    - 'include/dsn/utility/fail_point.h'\n    - 'include/dsn/utility/singleton.h'\n    - 'include/dsn/utility/process_utils.h'\n    - 'include/dsn/utility/flags.h'\n    - 'include/dsn/utility/string_conv.h'\n    - 'include/dsn/utility/timer.h'\n    - 'include/dsn/utility/output_utils.h'\n    - 'include/dsn/utility/string_splitter.h'\n    - 'include/dsn/utility/rand.h'\n    - 'include/dsn/utility/math.h'\n    - 'include/dsn/utility/defer.h'\n    - 'include/dsn/utility/alloc.h'\n    - 'include/dsn/utility/casts.h'\n    - 'include/dsn/utility/long_adder.h'\n    - 'include/dsn/utility/metrics.h'\n    - 'include/dsn/utility/nth_element.h'\n    - 'src/aio/aio_task.cpp'\n    - 'src/aio/test/main.cpp'\n    - 'src/meta/test/meta_http_service_test.cpp'\n    - 'src/meta/test/meta_service_test.cpp'\n    - 'src/perf_counter/perf_counter_atomic.h'\n    - 'src/perf_counter/test/main.cpp'\n    - 'src/remote_cmd/remote_command.cpp'\n    - 'src/runtime/sim_clock.h'\n    - 'src/runtime/test/replica_access_controller_test.cpp'\n    - 'src/runtime/test/server_negotiation_test.cpp'\n    - 'src/runtime/test/negotiation_manager_test.cpp'\n    - 'src/runtime/test/task_test.cpp'\n    - 'src/runtime/test/client_negotiation_test.cpp'\n    - 'src/runtime/test/message_reader_test.cpp'\n    - 'src/runtime/test/thrift_message_parser_test.cpp'\n    - 'src/runtime/test/meta_access_controller_test.cpp'\n    - 'src/runtime/security/sasl_wrapper.h'\n    - 'src/runtime/security/sasl_server_wrapper.cpp'\n    - 'src/runtime/security/server_negotiation.cpp'\n    - 'src/runtime/security/server_negotiation.h'\n    - 'src/runtime/security/sasl_server_wrapper.h'\n    - 'src/runtime/security/access_controller.h'\n    - 'src/runtime/security/access_controller.cpp'\n    - 'src/runtime/security/sasl_init.h'\n    - 'src/runtime/security/negotiation_manager.h'\n    - 'src/runtime/security/sasl_client_wrapper.cpp'\n    - 'src/runtime/security/kinit_context.cpp'\n    - 'src/runtime/security/client_negotiation.cpp'\n    - 'src/runtime/security/sasl_init.cpp'\n    - 'src/runtime/security/kinit_context.h'\n    - 'src/runtime/security/negotiation.cpp'\n    - 'src/runtime/security/negotiation_manager.cpp'\n    - 'src/runtime/security/client_negotiation.h'\n    - 'src/runtime/security/negotiation.h'\n    - 'src/runtime/security/security.thrift'\n    - 'src/runtime/security/sasl_wrapper.cpp'\n    - 'src/runtime/security/replica_access_controller.cpp'\n    - 'src/runtime/security/meta_access_controller.h'\n    - 'src/runtime/security/init.cpp'\n    - 'src/runtime/security/init.h'\n    - 'src/runtime/security/replica_access_controller.h'\n    - 'src/runtime/security/sasl_client_wrapper.h'\n    - 'src/runtime/security/negotiation_utils.h'\n    - 'src/runtime/security/meta_access_controller.cpp'\n    - 'src/runtime/rpc/request_meta.thrift'\n    - 'src/meta/meta_state_service_utils_impl.h'\n    - 'src/meta/meta_bulk_load_ingestion_context.cpp'\n    - 'src/meta/partition_guardian.h'\n    - 'src/meta/app_balance_policy.cpp'\n    - 'src/meta/test/meta_test_base.h'\n    - 'src/meta/test/meta_app_operation_test.cpp'\n    - 'src/meta/test/meta_mauanl_compaction_test.cpp'\n    - 'src/meta/test/meta_bulk_load_service_test.cpp'\n    - 'src/meta/test/ford_fulkerson_test.cpp'\n    - 'src/meta/test/cluster_balance_policy_test.cpp'\n    - 'src/meta/test/meta_test_base.cpp'\n    - 'src/meta/test/dummy_balancer.h'\n    - 'src/meta/test/meta_bulk_load_ingestion_test.cpp'\n    - 'src/meta/test/meta_backup_test.cpp'\n    - 'src/meta/test/backup_test.cpp'\n    - 'src/meta/test/main.cpp'\n    - 'src/meta/test/server_state_restore_test.cpp'\n    - 'src/meta/test/copy_replica_operation_test.cpp'\n    - 'src/meta/app_env_validator.cpp'\n    - 'src/meta/meta_rpc_types.h'\n    - 'src/meta/load_balance_policy.cpp'\n    - 'src/meta/meta_bulk_load_service.cpp'\n    - 'src/meta/cluster_balance_policy.cpp'\n    - 'src/meta/meta_split_service.cpp'\n    - 'src/meta/cluster_balance_policy.h'\n    - 'src/meta/app_env_validator.h'\n    - 'src/meta/meta_bulk_load_service.h'\n    - 'src/meta/backup_engine.cpp'\n    - 'src/meta/backup_engine.h'\n    - 'src/meta/meta_backup_service.cpp'\n    - 'src/meta/meta_backup_service.h'\n    - 'src/meta/meta_http_service.h'\n    - 'src/meta/meta_split_service.h'\n    - 'src/meta/meta_state_service_utils.h'\n    - 'src/meta/meta_state_service_utils.cpp'\n    - 'src/meta/meta_http_service.cpp'\n    - 'src/meta/app_balance_policy.h'\n    - 'src/meta/partition_guardian.cpp'\n    - 'src/meta/meta_bulk_load_ingestion_context.h'\n    - 'src/meta/load_balance_policy.h'\n    - 'src/meta/duplication/duplication_info.h'\n    - 'src/meta/duplication/meta_duplication_service.h'\n    - 'src/meta/duplication/duplication_info.cpp'\n    - 'src/meta/duplication/meta_duplication_service.cpp'\n    - 'src/meta/server_state_restore.cpp'\n    - 'src/utils/clock.cpp'\n    - 'src/utils/flags.cpp'\n    - 'src/utils/throttling_controller.h'\n    - 'src/utils/time_utils.cpp'\n    - 'src/utils/test/rand_test.cpp'\n    - 'src/utils/test/latency_tracer_test.cpp'\n    - 'src/utils/test/binary_reader_test.cpp'\n    - 'src/utils/test/fail_point_test.cpp'\n    - 'src/utils/test/hostname_test.cpp'\n    - 'src/utils/test/token_bucket_throttling_controller_test.cpp'\n    - 'src/utils/test/file_system_test.cpp'\n    - 'src/utils/test/endian_test.cpp'\n    - 'src/utils/test/token_buckets_test.cpp'\n    - 'src/utils/test/flag_test.cpp'\n    - 'src/utils/test/main.cpp'\n    - 'src/utils/fail_point.cpp'\n    - 'src/utils/math.cpp'\n    - 'src/utils/latency_tracer.cpp'\n    - 'src/utils/fail_point_impl.h'\n    - 'src/utils/process_utils.cpp'\n    - 'src/utils/token_bucket_throttling_controller.cpp'\n    - 'src/utils/token_buckets.cpp'\n    - 'src/utils/rand.cpp'\n    - 'src/utils/throttling_controller.cpp'\n    - 'src/utils/output_utils.cpp'\n    - 'src/utils/alloc.cpp'\n    - 'src/utils/long_adder.cpp'\n    - 'src/utils/long_adder_bench/long_adder_bench.cpp'\n    - 'src/utils/metrics.cpp'\n    - 'src/utils/shared_io_service.cpp'\n    - 'src/utils/test/long_adder_test.cpp'\n    - 'src/utils/test/metrics_test.cpp'\n    - 'src/utils/test/nth_element_bench/nth_element_bench.cpp'\n    - 'src/utils/test/nth_element_test.cpp'\n    - 'src/utils/test/nth_element_utils.h'\n    - 'src/utils/test/percentile_utils.h'\n    - 'src/common/partition_split.thrift'\n    - 'src/common/common.cpp'\n    - 'src/common/consensus.thrift'\n    - 'src/common/backup_common.h'\n    - 'src/common/bulk_load_common.cpp'\n    - 'src/common/test/fs_manager_test.cpp'\n    - 'src/common/test/common_test.cpp'\n    - 'src/common/test/replication_common_test.cpp'\n    - 'src/common/fs_manager.h'\n    - 'src/common/backup_common.cpp'\n    - 'src/common/bulk_load_common.h'\n    - 'src/common/duplication.thrift'\n    - 'src/common/manual_compact.h'\n    - 'src/common/duplication_common.cpp'\n    - 'src/common/partition_split_common.h'\n    - 'src/common/metadata.thrift'\n    - 'src/common/meta_admin.thrift'\n    - 'src/common/bulk_load.thrift'\n    - 'src/common/backup.thrift'\n    - 'src/common/replica_admin.thrift'\n    - 'src/http/CMakeLists.txt'\n    - 'src/http/builtin_http_calls.h'\n    - 'src/http/test/http_server_test.cpp'\n    - 'src/http/test/perf_counter_http_service_test.cpp'\n    - 'src/http/test/uri_decoder_test.cpp'\n    - 'src/http/http_server_impl.h'\n    - 'src/http/uri_decoder.cpp'\n    - 'src/http/uri_decoder.h'\n    - 'src/http/http_server.cpp'\n    - 'src/http/perf_counter_http_service.cpp'\n    - 'src/http/pprof_http_service.cpp'\n    - 'src/http/builtin_http_calls.cpp'\n    - 'src/http/service_version.cpp'\n    - 'src/http/pprof_http_service.h'\n    - 'src/http/service_version.h'\n    - 'src/http/config_http_service.cpp'\n    - 'src/http/http_call_registry.h'\n    - 'src/block_service/CMakeLists.txt'\n    - 'src/block_service/test/fds_service_test.cpp'\n    - 'src/block_service/test/CMakeLists.txt'\n    - 'src/block_service/test/hdfs_service_test.cpp'\n    - 'src/block_service/test/local_service_test.cpp'\n    - 'src/block_service/test/run.sh'\n    - 'src/block_service/test/block_service_mock.h'\n    - 'src/block_service/test/clear.sh'\n    - 'src/block_service/test/main.cpp'\n    - 'src/block_service/test/block_service_manager_test.cpp'\n    - 'src/block_service/directio_writable_file.h'\n    - 'src/block_service/fds/fds_service.cpp'\n    - 'src/block_service/fds/fds_service.h'\n    - 'src/block_service/block_service_manager.cpp'\n    - 'src/block_service/block_service_manager.h'\n    - 'src/block_service/local/local_service.cpp'\n    - 'src/block_service/local/local_service.h'\n    - 'src/block_service/hdfs/hdfs_service.cpp'\n    - 'src/block_service/hdfs/hdfs_service.h'\n    - 'src/block_service/directio_writable_file.cpp'\n    - 'src/replica/replica_disk_migrator.cpp'\n    - 'src/replica/split/replica_split_manager.cpp'\n    - 'src/replica/split/test/replica_split_test.cpp'\n    - 'src/replica/split/test/main.cpp'\n    - 'src/replica/split/replica_split_manager.h'\n    - 'src/replica/replica_http_service.cpp'\n    - 'src/replica/log_block.cpp'\n    - 'src/replica/test/replica_learn_test.cpp'\n    - 'src/replica/test/log_file_test.cpp'\n    - 'src/replica/test/throttling_controller_test.cpp'\n    - 'src/replica/test/replica_disk_test_base.h'\n    - 'src/replica/test/cold_backup_context_test.cpp'\n    - 'src/replica/test/replica_disk_test.cpp'\n    - 'src/replica/test/log_block_test.cpp'\n    - 'src/replica/test/clear.sh'\n    - 'src/replica/test/replica_test.cpp'\n    - 'src/replica/test/replica_disk_migrate_test.cpp'\n    - 'src/replica/test/main.cpp'\n    - 'src/replica/test/backup_block_service_mock.h'\n    - 'src/replica/test/open_replica_test.cpp'\n    - 'src/replica/replica_backup.cpp'\n    - 'src/replica/disk_cleaner.h'\n    - 'src/replica/mutation_log_replay.cpp'\n    - 'src/replica/bulk_load/test/replica_bulk_loader_test.cpp'\n    - 'src/replica/bulk_load/test/main.cpp'\n    - 'src/replica/bulk_load/replica_bulk_loader.cpp'\n    - 'src/replica/bulk_load/replica_bulk_loader.h'\n    - 'src/replica/disk_cleaner.cpp'\n    - 'src/replica/replica_disk_migrator.h'\n    - 'src/replica/log_block.h'\n    - 'src/replica/replica_throttle.cpp'\n    - 'src/replica/backup/cold_backup_context.h'\n    - 'src/replica/backup/cold_backup_context.cpp'\n    - 'src/replica/backup/test/replica_backup_manager_test.cpp'\n    - 'src/replica/backup/test/main.cpp'\n    - 'src/replica/backup/replica_backup_manager.h'\n    - 'src/replica/backup/replica_backup_server.cpp'\n    - 'src/replica/backup/replica_backup_manager.cpp'\n    - 'src/replica/backup/replica_backup_server.h'\n    - 'src/replica/replica_http_service.h'\n    - 'src/replica/replica_restore.cpp'\n    - 'src/replica/duplication/mutation_batch.h'\n    - 'src/replica/duplication/replica_follower.cpp'\n    - 'src/replica/duplication/replica_duplicator_manager.cpp'\n    - 'src/replica/duplication/duplication_pipeline.cpp'\n    - 'src/replica/duplication/replica_follower.h'\n    - 'src/replica/duplication/replica_duplicator_manager.h'\n    - 'src/replica/duplication/test/replica_http_service_test.cpp'\n    - 'src/replica/duplication/test/replica_duplicator_manager_test.cpp'\n    - 'src/replica/duplication/test/duplication_sync_timer_test.cpp'\n    - 'src/replica/duplication/test/load_from_private_log_test.cpp'\n    - 'src/replica/duplication/test/duplication_test_base.h'\n    - 'src/replica/duplication/test/replica_follower_test.cpp'\n    - 'src/replica/duplication/test/ship_mutation_test.cpp'\n    - 'src/replica/duplication/test/mutation_batch_test.cpp'\n    - 'src/replica/duplication/test/replica_duplicator_test.cpp'\n    - 'src/replica/duplication/test/main.cpp'\n    - 'src/replica/duplication/replica_duplicator.h'\n    - 'src/replica/duplication/duplication_pipeline.h'\n    - 'src/replica/duplication/load_from_private_log.h'\n    - 'src/replica/duplication/mutation_batch.cpp'\n    - 'src/replica/duplication/duplication_sync_timer.h'\n    - 'src/replica/duplication/duplication_sync_timer.cpp'\n    - 'src/replica/duplication/load_from_private_log.cpp'\n    - 'src/replica/duplication/replica_duplicator.cpp'\n    - 'src/perf_counter/builtin_counters.cpp'\n    - 'src/perf_counter/builtin_counters.h'\n    # Copyright (c) Facebook, Inc\n    - 'include/dsn/utility/TokenBucket.h'\n    - 'src/utils/test/TokenBucketTest.cpp'\n    - 'src/utils/test/TokenBucketTest.h'\n    # https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n    - 'include/dsn/utility/hpc_locks/autoreseteventcondvar.h'\n    - 'include/dsn/utility/hpc_locks/rwlock.h'\n    - 'include/dsn/utility/hpc_locks/autoresetevent.h'\n    - 'include/dsn/utility/hpc_locks/sema.h'\n    - 'include/dsn/utility/hpc_locks/bitfield.h'\n    - 'include/dsn/utility/hpc_locks/benaphore.h'\n    # Copyright (c) xxxx The Chromium Authors\n    - 'src/runtime/build_config.h'\n    - 'src/utils/test/autoref_ptr_test.cpp'\n    - 'include/dsn/utility/safe_strerror_posix.h'\n    - 'src/runtime/build_config.h'\n    - 'src/utils/test/autoref_ptr_test.cpp'\n    - 'src/utils/safe_strerror_posix.cpp'\n    # Copyright 2017 The Abseil Authors\n    - 'include/dsn/utility/absl/base/internal/invoke.h'\n    - 'include/dsn/utility/absl/utility/utility.h'\n    - 'include/dsn/utility/smart_pointers.h'\n    - 'include/dsn/utility/string_view.h'\n    - 'src/utils/test/memutil_test.cpp'\n    - 'src/utils/test/string_view_test.cpp'\n    - 'src/utils/test/smart_pointers_test.cpp'\n    - 'src/utils/memutil.h'\n    - 'src/utils/string_view.cpp'\n    # Copyright (c) 2010-2011, Rob Jansen\n    - 'bin/FindRT.cmake'\n    - 'bin/FindDL.cmake'\n    # Copyright (c) 2017 Guillaume Papin\n    - 'scripts/linux/run-clang-format.py'\n    # need manual fix\n    - 'src/failure_detector/test/gtest.filter'\n    - 'src/meta/test/meta_state/gtest.filter'\n    - 'src/meta/test/suite1'\n    - 'src/meta/test/suite2'\n    - 'src/nfs/test/nfs_test_file1'\n    - 'src/nfs/test/nfs_test_file2'\n    - 'src/runtime/test/gtest.filter'\n    - 'thirdparty/fix_fds_for_macos.patch'\n    - 'thirdparty/fix_s2_for_aarch64.patch'\n    - 'thirdparty/fix_thrift_for_cpp11.patch'\n    # should be empty, or ignore all comment lines\n    - 'src/utils/test/config-empty.ini'\n\n  comment: on-failure\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 3.11.0)\n\nif (\"${CMAKE_CXX_COMPILER_ID}\" STREQUAL \"GNU\")\n    # require at least gcc 5.4.0\n    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)\n        message(FATAL_ERROR \"GCC version must be at least 5.4.0!\")\n    endif ()\nendif ()\n\nproject(dsn C CXX)\n\ninclude(bin/dsn.cmake)\n\nset(DSN_BUILD_RUNTIME TRUE)\nadd_definitions(-DGTEST_HAS_TR1_TUPLE=0 -DGTEST_USE_OWN_TR1_TUPLE=0)\ndsn_common_setup()\n\n# add custom target for gcov\nif(ENABLE_GCOV)\n    find_program(GCOV_PATH gcov)\n    find_program(GCOVR_PATH gcovr)\n\n    if(NOT GCOV_PATH)\n        message(FATAL_ERROR \"gcov not found! Aborting...\")\n    endif() # NOT GCOV_PATH\n\n    if(NOT GCOVR_PATH)\n        message(FATAL_ERROR \"gcovr not found! Aborting...\")\n    endif() # NOT GCOVR_PATH\n\n    if(NOT CMAKE_BUILD_TYPE STREQUAL \"Debug\")\n        message(WARNING \"Code coverage results with an optimised (non-Debug) build may be misleading\")\n    endif() # NOT CMAKE_BUILD_TYPE STREQUAL \"Debug\"\n\n    set(COVERAGE_COMPILER_FLAGS \"-g -O0 --coverage -fprofile-arcs -ftest-coverage\" CACHE INTERNAL \"\")\n    set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}\")\n    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}\")\n    message(STATUS \"Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}\")\n\n    set(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS} -lgcov\")\n    add_definitions(-DENABLE_GCOV=1)\nendif()\n\nif(SANITIZER)\n    add_definitions(-DSANITIZER=1)\nendif()\n# Users don't have to configure CMAKE_INSTALL_PREFIX unless they want to customize\n# the destination.\nset(CMAKE_INSTALL_PREFIX ${DSN_ROOT} CACHE STRING \"\" FORCE)\nmessage (STATUS \"dsn Installation directory: CMAKE_INSTALL_PREFIX = \" ${CMAKE_INSTALL_PREFIX})\n\n# install rdsn/include to ${CMAKE_INCLUDE_PREFIX}/include\ninstall(DIRECTORY include/ DESTINATION include)\n\n# NOTE: dsn_types.h is not actually a thrift-generated file. As pegasus may reference it,\n# we should copy also this file to the path the same as other thrift-generated files.\ninstall(FILES include/dsn/cpp/serialization_helper/dsn_types.h DESTINATION include)\n# TODO(wutao1): remove this line after dsn.layer2_types.h is removed from repo.\ninstall(FILES include/dsn/cpp/serialization_helper/dsn.layer2_types.h DESTINATION include)\n\ninclude_directories(${DSN_PROJECT_DIR}/include)\ninclude_directories(${DSN_PROJECT_DIR}/include/dsn/cpp/serialization_helper)\ninclude_directories(${DSN_PROJECT_DIR}/src)\n\nadd_subdirectory(src)\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "<!--\nLicensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements.  See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership.  The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License.  You may obtain a copy of the License at\n\n  http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing,\nsoftware distributed under the License is distributed on an\n\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\nKIND, either express or implied.  See the License for the\nspecific language governing permissions and limitations\nunder the License.\n-->\n\n# Contributing to Pegasus\n\nThis is a rough outline of what a contributor's workflow looks like:\n\n- Create a topic branch from where you want to base your work. This is usually master.\n- Make commits of logical units and add test case if the change fixes a bug or adds new functionality.\n- Run tests and make sure all the tests are passed.\n- Make sure your commit messages are in the proper format (see below).\n- Push your changes to a topic branch in your fork of the repository.\n- Submit a pull request.\n- Your PR must receive LGTMs from two maintainers.\n\nThanks for your contributions!\n\n### Format of the Commit Message\n\nWe follow a rough convention for commit messages that is designed to answer two\nquestions: what changed and why. The subject line should feature the what and\nthe body of the commit should describe the why.\n\n```\nutility: introduce rpc_holder\n\nThis is a wrapper of dsn_message_t. It manages the lifetime follow RAII.\n\nFix #10\n```\n\nThe format can be described more formally as follows:\n\n```\n<subsystem>: <what changed>\n<BLANK LINE>\n<why this change was made>\n<BLANK LINE>\n<footer>(optional)\n```\n\nThe first line is the subject and should be no longer than 70 characters, the\nsecond line is always blank, and other lines should be wrapped at 80 characters.\nThis allows the message to be easier to read on GitHub as well as in various\ngit tools.\n\nIf the change affects more than one subsystem, you can use comma to separate them like `util/codec,util/types:`.\n\nIf the change affects many subsystems, you can use ```*``` instead, like ```*:```.\n\nFor the why part, if no specific reason for the change,\nyou can use one of some generic reasons like \"Improve documentation.\",\n\"Improve performance.\", \"Improve robustness.\", \"Improve test coverage.\"\n"
  },
  {
    "path": "LICENSE",
    "content": "The MIT License (MIT)\n\nCopyright (c) Microsoft Corporation\n\nAll rights reserved.\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n\n--------------------------------------------------------------------------------\n\ninclude/dsn/utility/TokenBucket.h - Apache License, Version 2.0\nsrc/utils/test/TokenBucketTest.cpp\nsrc/utils/test/TokenBucketTest.h\n\nCopyright (c) Facebook, Inc. and its affiliates.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n    http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n\n--------------------------------------------------------------------------------\n\ninclude/dsn/utility/hpc_locks/autoreseteventcondvar.h - zlib License\ninclude/dsn/utility/hpc_locks/rwlock.h\ninclude/dsn/utility/hpc_locks/autoresetevent.h\ninclude/dsn/utility/hpc_locks/sema.h\ninclude/dsn/utility/hpc_locks/bitfield.h\ninclude/dsn/utility/hpc_locks/benaphore.h\n\nCopyright (c) 2015 Jeff Preshing\n\nThis software is provided 'as-is', without any express or implied\nwarranty. In no event will the authors be held liable for any damages\narising from the use of this software.\n\nPermission is granted to anyone to use this software for any purpose,\nincluding commercial applications, and to alter it and redistribute it\nfreely, subject to the following restrictions:\n\n1. The origin of this software must not be misrepresented; you must not\n   claim that you wrote the original software. If you use this software\n   in a product, an acknowledgement in the product documentation would be\n   appreciated but is not required.\n2. Altered source versions must be plainly marked as such, and must not be\n   misrepresented as being the original software.\n3. This notice may not be removed or altered from any source distribution.\n\n--------------------------------------------------------------------------------\n\nsrc/runtime/build_config.h - BSD-3-Clause license\nsrc/utils/test/autoref_ptr_test.cpp\ninclude/dsn/utility/safe_strerror_posix.h\nsrc/runtime/build_config.h\nsrc/utils/test/autoref_ptr_test.cpp\nsrc/utils/safe_strerror_posix.cpp\n\nCopyright (c) 2012 The Chromium Authors. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n\n   * Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n   * Redistributions in binary form must reproduce the above\ncopyright notice, this list of conditions and the following disclaimer\nin the documentation and/or other materials provided with the\ndistribution.\n   * Neither the name of Google Inc. nor the names of its\ncontributors may be used to endorse or promote products derived from\nthis software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n--------------------------------------------------------------------------------\n\ninclude/dsn/utility/absl/base/internal/invoke.h - Apache License, Version 2.0\ninclude/dsn/utility/absl/utility/utility.h\ninclude/dsn/utility/smart_pointers.h\ninclude/dsn/utility/string_view.h\nsrc/utils/test/memutil_test.cpp\nsrc/utils/test/string_view_test.cpp\nsrc/utils/test/smart_pointers_test.cpp\nsrc/utils/memutil.h\nsrc/utils/string_view.cpp\n\nCopyright 2017 The Abseil Authors.\n\nLicensed under the Apache License, Version 2.0 (the \"License\");\nyou may not use this file except in compliance with the License.\nYou may obtain a copy of the License at\n\n     http://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.\n\n--------------------------------------------------------------------------------\n\nbin/FindRT.cmake - BSD-3-Clause license\nbin/FindDL.cmake\n\nCopyright (c) 2010-2011, Rob Jansen\n\nTo the extent that a federal employee is an author of a portion of\nthis software or a derivative work thereof, no copyright is claimed by\nthe United States Government, as represented by the Secretary of the\nNavy (\"GOVERNMENT\") under Title 17, U.S. Code. All Other Rights\nReserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are\nmet:\n    * Redistributions of source code must retain the above copyright\nnotice, this list of conditions and the following disclaimer.\n    * Redistributions in binary form must reproduce the above\ncopyright notice, this list of conditions and the following disclaimer\nin the documentation and/or other materials provided with the\ndistribution.\n    * Neither the names of the copyright owners nor the names of its\ncontributors may be used to endorse or promote products derived from\nthis software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\nLIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\nA PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\nOWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\nSPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\nLIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\nTHEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\nGOVERNMENT ALLOWS FREE USE OF THIS SOFTWARE IN ITS \"AS IS\" CONDITION\nAND DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER\nRESULTING FROM THE USE OF THIS SOFTWARE.\n\n--------------------------------------------------------------------------------\n\nscripts/linux/run-clang-format.py - MIT License\n\nMIT License\n\nCopyright (c) 2017 Guillaume Papin\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "<!--\n  ~ The MIT License (MIT)\n  ~\n  ~ Copyright (c) 2015 Microsoft Corporation\n  ~\n  ~ -=- Robust Distributed System Nucleus (rDSN) -=-\n  ~\n  ~ Permission is hereby granted, free of charge, to any person obtaining a copy\n  ~ of this software and associated documentation files (the \"Software\"), to deal\n  ~ in the Software without restriction, including without limitation the rights\n  ~ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n  ~ copies of the Software, and to permit persons to whom the Software is\n  ~ furnished to do so, subject to the following conditions:\n  ~\n  ~ The above copyright notice and this permission notice shall be included in\n  ~ all copies or substantial portions of the Software.\n  ~\n  ~ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n  ~ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n  ~ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n  ~ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n  ~ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n  ~ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n  ~ THE SOFTWARE.\n-->\n\n[![Build Status](https://travis-ci.org/XiaoMi/rdsn.svg?branch=master)](https://travis-ci.org/XiaoMi/rdsn)\n\nAll pull requests please now go to https://github.com/imzhenyu/rdsn for automatic integration with latest version. We will periodically update this repo. Thank you.\n\n### Top Links\n * [[Case](https://github.com/imzhenyu/rocksdb)] RocksDB made replicated using rDSN!\n * [[Tutorial](https://github.com/Microsoft/rDSN/wiki/Tutorial:-Build-A-Single-Node-Counter-Service)] Build a counter service with built-in tools (e.g., codegen, auto-test, fault injection, bug replay, tracing)\n * [[Tutorial](https://github.com/Microsoft/rDSN/wiki/Tutorial:-Build-A-Scalable-and-Reliable-Counter-Service)] Build a scalable and reliable counter service with built-in replication support\n * [[Tutorial](https://github.com/Microsoft/rDSN/wiki/Tutorial:-Perfect-Failure-Detector)] Build a perfect failure detector with progressively added system complexity\n * [[Tutorial](https://github.com/Microsoft/rDSN/wiki/Tutorial:-Plugin-A-New-Network-Implementation)] Plugin my own network implementation for higher performance\n * [Installation](https://github.com/Microsoft/rDSN/wiki/Installation)\n \n<hr>\n\n**Robust Distributed System Nucleus (rDSN)** is a framework for quickly building robust distributed systems. It has a microkernel for pluggable components, including applications, distributed frameworks, devops tools, and local runtime/resource providers, enabling their independent development and seamless integration. The project was originally developed for Microsoft Bing, and now has been adopted in production both inside and outside Microsoft. \n\n* [How does rDSN build robustness?](#novel)\n* [What I can do with rDSN?](#cando)\n* [What are the existing modules I can immediately use?] (#existing)\n\n### <a name=\"cando\"> What I can do with rDSN? </a>\n\n * an enhanced event-driven RPC library such as libevent, Thrift, and GRPC\n * a production Paxos framework to quickly turn a local component (e.g., rocksdb) into a online service with replication, partition, failure recovery, and reconfiguration supports\n * a scale-out and fail-over framework for stateless services such as Memcached\n * more as you can imagine.\n\n### <a name=\"novel\"> How does rDSN build robustness? </a> \n\n * **reduced system complexity via microkernel architecture**: applications, frameworks (e.g., replication, scale-out, fail-over), local runtime libraries (e.g., network libraries, locks), and tools are all pluggable modules into a microkernel to enable independent development and seamless integration (therefore modules are reusable and transparently benefit each other) \n ![rDSN Architecture](resources/arch.png)\n * **auto-handled distributed system challenges**: built-in frameworks to achieve scalability, reliability, availability, and consistency etc. for the applications\n ![rDSN service model](resources/rdsn-layer2.jpg)\n * **transparent tooling support**: dedicated tool API for tool development; built-in plugged tools for understanding, testing, debugging, and monitoring the upper applications and frameworks \n ![rDSN Architecture](resources/viz.png)\n * **late resource binding with global deploy-time view**: tailor the module instances and their connections on demand with controllable system complexity and resource mapping (e.g., run all nodes in one simulator for testing, allocate CPU resources appropriately for avoiding resource contention, debug with progressively added system complexity) \n ![rDSN Configuration](resources/config.png)\n \n \n### <a name=\"existing\">Existing pluggable modules (and growing) </a>\n\n##### Distributed frameworks\n\n * a production Paxos framework to quickly turn a local component (e.g., rocksdb) into an online service with replication, partition, failure recovery, and reconfiguration supports\n * a scale-out and fail-over framework for stateless services such as Memcached\n\n##### Local runtime libraries \n\n * network libraries on Linux/Windows supporting rDSN/Thrift/HTTP messages at the same time\n * asynchronous disk IO on Linux/Windows\n * locks, rwlocks, semaphores\n * task queues \n * timer services\n * performance counters\n * loggers (high-perf, screen)\n\n##### Devops tools\n\n * nativerun and fastrun enables native deployment on Windows and Linux \n * simulator debugs multiple nodes in one single process without worry about timeout\n * explorer extracts task-level dependencies automatically\n * tracer dumps logs for how requests are processed across tasks/nodes\n * profiler shows detailed task-level performance data (e.g., queue-time, exec-time)\n * fault-injector mimics data center failures to expose bugs early\n * global-checker enables cross-node assertion \n * replayer reproduces the bugs for easier root cause analysis\n * build-in web studio to visualize task-level performance and dependency information\n\n##### Other distributed providers and libraries\n\n * remote file copy \n * perfect failure detector\n * multi-master perfect failure detector \n\n### License and Support\n\nrDSN is provided on Windows and Linux, with the MIT open source license. You can use the \"issues\" tab in GitHub to report bugs. \n\n"
  },
  {
    "path": "bin/FindDL.cmake",
    "content": "# Copyright (c) 2010-2011, Rob Jansen\n\n# To the extent that a federal employee is an author of a portion of\n# this software or a derivative work thereof, no copyright is claimed by\n# the United States Government, as represented by the Secretary of the\n# Navy (\"GOVERNMENT\") under Title 17, U.S. Code. All Other Rights \n# Reserved.\n\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions are\n# met:\n#     * Redistributions of source code must retain the above copyright\n# notice, this list of conditions and the following disclaimer.\n#     * Redistributions in binary form must reproduce the above\n# copyright notice, this list of conditions and the following disclaimer\n# in the documentation and/or other materials provided with the\n# distribution.\n#     * Neither the names of the copyright owners nor the names of its\n# contributors may be used to endorse or promote products derived from\n# this software without specific prior written permission.\n\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n# GOVERNMENT ALLOWS FREE USE OF THIS SOFTWARE IN ITS \"AS IS\" CONDITION\n# AND DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER\n# RESULTING FROM THE USE OF THIS SOFTWARE.\n\n# - Check for the presence of DL\n#\n# The following variables are set when DL is found:\n#  HAVE_DL       = Set to true, if all components of DL\n#                          have been found.\n#  DL_INCLUDES   = Include path for the header files of DL\n#  DL_LIBRARIES  = Link these to use DL\n\n## -----------------------------------------------------------------------------\n## Check for the header files\n\nfind_path (DL_INCLUDES dlfcn.h\n  PATHS /usr/local/include /usr/include ${CMAKE_EXTRA_INCLUDES}\n  )\n\n## -----------------------------------------------------------------------------\n## Check for the library\n\nfind_library (DL_LIBRARIES dl\n  PATHS /usr/local/lib64 /usr/lib64 /lib64 ${CMAKE_EXTRA_LIBRARIES}\n  )\n\n## -----------------------------------------------------------------------------\n## Actions taken when all components have been found\n\nif (DL_INCLUDES AND DL_LIBRARIES)\n  set (HAVE_DL TRUE)\nelse (DL_INCLUDES AND DL_LIBRARIES)\n  if (NOT DL_FIND_QUIETLY)\n    if (NOT DL_INCLUDES)\n      message (STATUS \"Unable to find DL header files!\")\n    endif (NOT DL_INCLUDES)\n    if (NOT DL_LIBRARIES)\n      message (STATUS \"Unable to find DL library files!\")\n    endif (NOT DL_LIBRARIES)\n  endif (NOT DL_FIND_QUIETLY)\nendif (DL_INCLUDES AND DL_LIBRARIES)\n\nif (HAVE_DL)\n  if (NOT DL_FIND_QUIETLY)\n    message (STATUS \"Found components for DL\")\n    message (STATUS \"DL_INCLUDES = ${DL_INCLUDES}\")\n    message (STATUS \"DL_LIBRARIES = ${DL_LIBRARIES}\")\n  endif (NOT DL_FIND_QUIETLY)\nelse (HAVE_DL)\n  if (DL_FIND_REQUIRED)\n    message (FATAL_ERROR \"Could not find DL!\")\n  endif (DL_FIND_REQUIRED)\nendif (HAVE_DL)\n\nmark_as_advanced (\n  HAVE_DL\n  DL_LIBRARIES\n  DL_INCLUDES\n  )"
  },
  {
    "path": "bin/FindJemalloc.cmake",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nfind_path(Jemalloc_INCLUDE_DIRS\n    NAMES jemalloc/jemalloc.h\n    PATHS ${DSN_THIRDPARTY_ROOT}/include\n    NO_DEFAULT_PATH\n)\n\nfind_library(Jemalloc_SHARED_LIBRARIES\n    NAMES jemalloc\n    PATHS ${DSN_THIRDPARTY_ROOT}/lib\n    NO_DEFAULT_PATH\n)\n\nfind_library(Jemalloc_STATIC_LIBRARIES\n    NAMES libjemalloc_pic.a\n    PATHS ${DSN_THIRDPARTY_ROOT}/lib\n    NO_DEFAULT_PATH\n)\n\nif(Jemalloc_INCLUDE_DIRS AND Jemalloc_SHARED_LIBRARIES AND Jemalloc_STATIC_LIBRARIES)\n    set(Jemalloc_FOUND TRUE)\nelse()\n    set(Jemalloc_FOUND FALSE)\nendif()\n\nif(Jemalloc_FOUND)\n    message(STATUS \"Found jemalloc header files: ${Jemalloc_INCLUDE_DIRS}\")\n    message(STATUS \"Found jemalloc shared libs: ${Jemalloc_SHARED_LIBRARIES}\")\n    message(STATUS \"Found jemalloc static libs: ${Jemalloc_STATIC_LIBRARIES}\")\nelse()\n    if(Jemalloc_FIND_REQUIRED)\n        message(FATAL_ERROR \"Not found jemalloc in ${DSN_THIRDPARTY_ROOT}\")\n    endif()\nendif()\n\nmark_as_advanced(\n    Jemalloc_INCLUDE_DIRS\n    Jemalloc_SHARED_LIBRARIES\n    Jemalloc_STATIC_LIBRARIES\n)\n\nif(Jemalloc_FOUND AND NOT (TARGET JeMalloc::JeMalloc))\n    if(\"${JEMALLOC_LIB_TYPE}\" STREQUAL \"SHARED\")\n        add_library(JeMalloc::JeMalloc SHARED IMPORTED)\n        set_target_properties(JeMalloc::JeMalloc PROPERTIES\n            INTERFACE_INCLUDE_DIRECTORIES ${Jemalloc_INCLUDE_DIRS}\n            IMPORTED_LOCATION ${Jemalloc_SHARED_LIBRARIES}\n        )\n        message(STATUS \"Use jemalloc lib type: ${JEMALLOC_LIB_TYPE}\")\n        message(STATUS \"Use jemalloc lib: ${Jemalloc_SHARED_LIBRARIES}\")\n    elseif(\"${JEMALLOC_LIB_TYPE}\" STREQUAL \"STATIC\")\n        add_library(JeMalloc::JeMalloc STATIC IMPORTED)\n        set_target_properties(JeMalloc::JeMalloc PROPERTIES\n            INTERFACE_INCLUDE_DIRECTORIES ${Jemalloc_INCLUDE_DIRS}\n            IMPORTED_LINK_INTERFACE_LANGUAGES \"C;CXX\"\n            IMPORTED_LOCATION ${Jemalloc_STATIC_LIBRARIES}\n        )\n        message(STATUS \"Use jemalloc lib type: ${JEMALLOC_LIB_TYPE}\")\n        message(STATUS \"Use jemalloc lib: ${Jemalloc_STATIC_LIBRARIES}\")\n    else()\n        message(FATAL_ERROR \"Invalid jemalloc lib type: ${JEMALLOC_LIB_TYPE}\")\n    endif()\nendif()\n"
  },
  {
    "path": "bin/FindRT.cmake",
    "content": "# Copyright (c) 2010-2011, Rob Jansen\n\n# To the extent that a federal employee is an author of a portion of\n# this software or a derivative work thereof, no copyright is claimed by\n# the United States Government, as represented by the Secretary of the\n# Navy (\"GOVERNMENT\") under Title 17, U.S. Code. All Other Rights \n# Reserved.\n\n# Redistribution and use in source and binary forms, with or without\n# modification, are permitted provided that the following conditions are\n# met:\n#     * Redistributions of source code must retain the above copyright\n# notice, this list of conditions and the following disclaimer.\n#     * Redistributions in binary form must reproduce the above\n# copyright notice, this list of conditions and the following disclaimer\n# in the documentation and/or other materials provided with the\n# distribution.\n#     * Neither the names of the copyright owners nor the names of its\n# contributors may be used to endorse or promote products derived from\n# this software without specific prior written permission.\n\n# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n# \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n# GOVERNMENT ALLOWS FREE USE OF THIS SOFTWARE IN ITS \"AS IS\" CONDITION\n# AND DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER\n# RESULTING FROM THE USE OF THIS SOFTWARE.\n\n# - Check for the presence of RT\n#\n# The following variables are set when RT is found:\n#  HAVE_RT       = Set to true, if all components of RT\n#                          have been found.\n#  RT_INCLUDES   = Include path for the header files of RT\n#  RT_LIBRARIES  = Link these to use RT\n\n## -----------------------------------------------------------------------------\n## Check for the header files\n\nfind_path (RT_INCLUDES time.h\n  PATHS /usr/local/include /usr/include ${CMAKE_EXTRA_INCLUDES}\n  )\n\n## -----------------------------------------------------------------------------\n## Check for the library\n\nfind_library (RT_LIBRARIES rt\n  PATHS /usr/local/lib64 /usr/lib64 /lib64 ${CMAKE_EXTRA_LIBRARIES}\n  )\n\n## -----------------------------------------------------------------------------\n## Actions taken when all components have been found\n\nif (RT_INCLUDES AND RT_LIBRARIES)\n  set (HAVE_RT TRUE)\nelse (RT_INCLUDES AND RT_LIBRARIES)\n  if (NOT RT_FIND_QUIETLY)\n    if (NOT RT_INCLUDES)\n      message (STATUS \"Unable to find RT header files!\")\n    endif (NOT RT_INCLUDES)\n    if (NOT RT_LIBRARIES)\n      message (STATUS \"Unable to find RT library files!\")\n    endif (NOT RT_LIBRARIES)\n  endif (NOT RT_FIND_QUIETLY)\nendif (RT_INCLUDES AND RT_LIBRARIES)\n\nif (HAVE_RT)\n  if (NOT RT_FIND_QUIETLY)\n    message (STATUS \"Found components for RT\")\n    message (STATUS \"RT_INCLUDES = ${RT_INCLUDES}\")\n    message (STATUS \"RT_LIBRARIES = ${RT_LIBRARIES}\")\n  endif (NOT RT_FIND_QUIETLY)\nelse (HAVE_RT)\n  if (RT_FIND_REQUIRED)\n    message (FATAL_ERROR \"Could not find RT!\")\n  endif (RT_FIND_REQUIRED)\nendif (HAVE_RT)\n\nmark_as_advanced (\n  HAVE_RT\n  RT_LIBRARIES\n  RT_INCLUDES\n  )"
  },
  {
    "path": "bin/compiler_info.cmake",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n#\n# Sets COMPILER_FAMILY to 'clang' or 'gcc'\n# Sets COMPILER_VERSION to the version\n\ninclude(CMakeDetermineCXXCompiler)\nmessage(\"Running rdsn/bin/compiler_info.cmake\")\nexecute_process(COMMAND env LANG=C \"${CMAKE_CXX_COMPILER}\" -v\n                ERROR_VARIABLE COMPILER_VERSION_FULL)\nmessage(${COMPILER_VERSION_FULL})\n\n# clang on Linux and Mac OS X before 10.9\nif(\"${COMPILER_VERSION_FULL}\" MATCHES \".*clang version.*\")\n  set(COMPILER_FAMILY \"clang\")\n  string(REGEX REPLACE \".*clang version ([0-9]+\\\\.[0-9]+).*\" \"\\\\1\"\n    COMPILER_VERSION \"${COMPILER_VERSION_FULL}\")\n\n# gcc\nelseif(\"${COMPILER_VERSION_FULL}\" MATCHES \".*gcc version.*\")\n  set(COMPILER_FAMILY \"gcc\")\n  string(REGEX REPLACE \".*gcc version ([0-9\\\\.]+).*\" \"\\\\1\"\n    COMPILER_VERSION \"${COMPILER_VERSION_FULL}\")\nelse()\n  message(FATAL_ERROR \"Unknown compiler. Version info:\\n${COMPILER_VERSION_FULL}\")\nendif()\nmessage(\"Selected compiler ${COMPILER_FAMILY} ${COMPILER_VERSION}\")\n\n# gcc (and some varieties of clang) mention the path prefix where system headers\n# and libraries are located.\nif(\"${COMPILER_VERSION_FULL}\" MATCHES \"Configured with: .* --prefix=([^ ]*)\")\n  set(COMPILER_SYSTEM_PREFIX_PATH ${CMAKE_MATCH_1})\n  message(\"Selected compiler built with --prefix=${COMPILER_SYSTEM_PREFIX_PATH}\")\nendif()"
  },
  {
    "path": "bin/dsn.cmake",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\ninclude(${CMAKE_CURRENT_LIST_DIR}/compiler_info.cmake)\nset(CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR};${CMAKE_MODULE_PATH}) # TODO: move all Find*.cmake into cmake/\n\n# Always generate the compilation database file (compile_commands.json) for use\n# with various development tools, such as IWYU and Vim's YouCompleteMe plugin.\n# See http://clang.llvm.org/docs/JSONCompilationDatabase.html\nset(CMAKE_EXPORT_COMPILE_COMMANDS TRUE)\n\n# Set DSN_PROJECT_DIR to rdsn/\nset(DSN_PROJECT_DIR ${CMAKE_CURRENT_LIST_DIR})\nget_filename_component(DSN_PROJECT_DIR ${DSN_PROJECT_DIR} DIRECTORY)\n\n# Set DSN_THIRDPARTY_ROOT to rdsn/thirdparty/output\nset(DSN_THIRDPARTY_ROOT ${DSN_PROJECT_DIR}/thirdparty/output)\nmessage(STATUS \"DSN_THIRDPARTY_ROOT = ${DSN_THIRDPARTY_ROOT}\")\n\n# Set DSN_ROOT to rdsn/DSN_ROOT, this is where rdsn will be installed\nset(DSN_ROOT ${DSN_PROJECT_DIR}/DSN_ROOT)\nmessage(STATUS \"DSN_ROOT = ${DSN_ROOT}\")\n\noption(BUILD_TEST \"build unit test\" ON)\nmessage(STATUS \"BUILD_TEST = ${BUILD_TEST}\")\n\noption(ENABLE_GCOV \"Enable gcov (for code coverage analysis)\" OFF)\nmessage(STATUS \"ENABLE_GCOV = ${ENABLE_GCOV}\")\n\n# Disable this option before running valgrind.\noption(ENABLE_GPERF \"Enable gperftools (for tcmalloc)\" ON)\nmessage(STATUS \"ENABLE_GPERF = ${ENABLE_GPERF}\")\n\noption(USE_JEMALLOC \"Use jemalloc\" OFF)\nmessage(STATUS \"USE_JEMALLOC = ${USE_JEMALLOC}\")\n\nif(ENABLE_GPERF AND USE_JEMALLOC)\n    message(FATAL_ERROR \"cannot enable both gperftools and jemalloc simultaneously\")\nendif()\n\nif(USE_JEMALLOC)\n    set(JEMALLOC_LIB_TYPE \"SHARED\")\nendif()\n\n# ================================================================== #\n\n# Helper function to add preprocesor definition of FILE_BASENAME\n# to pass the filename without directory path for debugging use.\n#\n# Note that in header files this is not consistent with\n# __FILE__ and __LINE__ since FILE_BASENAME will be the\n# compilation unit source file name (.c/.cpp).\n#\n# Example:\n#\n#   define_file_basename_for_sources(my_target)\n#\n# Will add -DFILE_BASENAME=\"filename\" for each source file depended on\n# by my_target, where filename is the name of the file.\n#\nfunction(define_file_basename_for_sources targetname)\n    get_target_property(source_files \"${targetname}\" SOURCES)\n    foreach(sourcefile ${source_files})\n        # Add the FILE_BASENAME=filename compile definition to the list.\n        get_filename_component(basename \"${sourcefile}\" NAME)\n        # Set the updated compile definitions on the source file.\n        set_property(\n            SOURCE \"${sourcefile}\" APPEND\n            PROPERTY COMPILE_DEFINITIONS \"__FILENAME__=\\\"${basename}\\\"\")\n    endforeach()\nendfunction()\n\n# Install this target into ${CMAKE_INSTALL_PREFIX}/lib\nfunction(dsn_install_library)\n    install(TARGETS ${MY_PROJ_NAME} DESTINATION \"lib\")\nendfunction()\n\n# Install this target into ${CMAKE_INSTALL_PREFIX}/bin/${PROJ_NAME}\nfunction(dsn_install_executable)\n    set(MY_PROJ_TYPE \"EXECUTABLE\")\n    set(INSTALL_DIR \"bin/${MY_PROJ_NAME}\")\n    install(TARGETS ${MY_PROJ_NAME} DESTINATION \"${INSTALL_DIR}\")\n\n    # install the extra files together with the executable\n    if(NOT (MY_BINPLACES STREQUAL \"\"))\n        foreach(BF ${MY_BINPLACES})\n            install(FILES ${BF} DESTINATION \"${INSTALL_DIR}\")\n        endforeach()\n    endif()\nendfunction()\n\nfunction(ms_add_project PROJ_TYPE PROJ_NAME PROJ_SRC PROJ_LIBS PROJ_BINPLACES)\n    if(NOT((PROJ_TYPE STREQUAL \"STATIC\") OR (PROJ_TYPE STREQUAL \"SHARED\") OR\n           (PROJ_TYPE STREQUAL \"EXECUTABLE\") OR (PROJ_TYPE STREQUAL \"OBJECT\")))\n        message(FATAL_ERROR \"Invalid project type.\")\n    endif()\n\n    if(PROJ_SRC STREQUAL \"\")\n        message(FATAL_ERROR \"No source files.\")\n    endif()\n\n    if((PROJ_TYPE STREQUAL \"STATIC\") OR (PROJ_TYPE STREQUAL \"OBJECT\"))\n        add_library(${PROJ_NAME} ${PROJ_TYPE} ${PROJ_SRC})\n    elseif(PROJ_TYPE STREQUAL \"SHARED\")\n        add_library(${PROJ_NAME} ${PROJ_TYPE} ${PROJ_SRC})\n    elseif(PROJ_TYPE STREQUAL \"EXECUTABLE\")\n        add_executable(${PROJ_NAME} ${PROJ_SRC})\n    endif()\n\n    if((PROJ_TYPE STREQUAL \"SHARED\") OR (PROJ_TYPE STREQUAL \"EXECUTABLE\"))\n        if(PROJ_TYPE STREQUAL \"SHARED\")\n            set(LINK_MODE PRIVATE)\n        else()\n            set(LINK_MODE PUBLIC)\n        endif()\n        target_link_libraries(${PROJ_NAME} \"${LINK_MODE}\" ${PROJ_LIBS})\n    endif()\nendfunction(ms_add_project)\n\n\n# Parameters:\n# - MY_PROJ_TYPE\n# - MY_PROJ_NAME\n# - MY_SRC_SEARCH_MODE\n#     Search mode for source files under current project directory\n#     \"GLOB_RECURSE\" for recursive search\n#     \"GLOB\" for non-recursive search\n# - MY_PROJ_SRC\n# - MY_PROJ_LIBS\n# - MY_BINPLACES\n#     Extra files that will be installed\n# - MY_BOOST_LIBS\nfunction(dsn_add_project)\n    if((NOT DEFINED MY_PROJ_TYPE) OR (MY_PROJ_TYPE STREQUAL \"\"))\n        message(FATAL_ERROR \"MY_PROJ_TYPE is empty.\")\n    endif()\n    if((NOT DEFINED MY_PROJ_NAME) OR (MY_PROJ_NAME STREQUAL \"\"))\n        message(FATAL_ERROR \"MY_PROJ_NAME is empty.\")\n    endif()\n    if(NOT DEFINED MY_SRC_SEARCH_MODE)\n        set(MY_SRC_SEARCH_MODE \"GLOB\")\n    endif()\n\n    # find source files from current directory\n    if(NOT DEFINED MY_PROJ_SRC)\n        set(MY_PROJ_SRC \"\")\n    endif()\n    set(TEMP_SRC \"\")\n    # We restrict the file suffix to keep our codes consistent.\n    file(${MY_SRC_SEARCH_MODE} TEMP_SRC\n         \"${CMAKE_CURRENT_SOURCE_DIR}/*.cpp\"\n         \"${CMAKE_CURRENT_SOURCE_DIR}/*.c\"\n         )\n    set(MY_PROJ_SRC ${TEMP_SRC} ${MY_PROJ_SRC})\n\n    if(NOT DEFINED MY_PROJ_LIBS)\n        set(MY_PROJ_LIBS \"\")\n    endif()\n    if(NOT DEFINED MY_BINPLACES)\n        set(MY_BINPLACES \"\")\n    endif()\n\n    if(NOT DEFINED MY_BOOST_LIBS)\n        set(MY_BOOST_LIBS \"\")\n    endif()\n\n    if((MY_PROJ_TYPE STREQUAL \"SHARED\") OR (MY_PROJ_TYPE STREQUAL \"EXECUTABLE\"))\n        set(MY_PROJ_LIBS ${MY_PROJ_LIBS} ${DEFAULT_THIRDPARTY_LIBS} ${MY_BOOST_LIBS} ${DSN_SYSTEM_LIBS})\n    endif()\n    ms_add_project(\"${MY_PROJ_TYPE}\" \"${MY_PROJ_NAME}\" \"${MY_PROJ_SRC}\" \"${MY_PROJ_LIBS}\" \"${MY_BINPLACES}\")\n    define_file_basename_for_sources(${MY_PROJ_NAME})\nendfunction(dsn_add_project)\n\nfunction(dsn_add_static_library)\n    set(MY_PROJ_TYPE \"STATIC\")\n    dsn_add_project()\n    dsn_install_library()\nendfunction(dsn_add_static_library)\n\nfunction(dsn_add_shared_library)\n    set(MY_PROJ_TYPE \"SHARED\")\n    dsn_add_project()\n    dsn_install_library()\nendfunction(dsn_add_shared_library)\n\nfunction(dsn_add_executable)\n    set(MY_PROJ_TYPE \"EXECUTABLE\")\n    dsn_add_project()\nendfunction(dsn_add_executable)\n\nfunction(dsn_add_object)\n    set(MY_PROJ_TYPE \"OBJECT\")\n    dsn_add_project()\nendfunction(dsn_add_object)\n\nfunction(dsn_add_test)\n    if(${BUILD_TEST})\n        set(MY_EXECUTABLE_IS_TEST TRUE)\n        dsn_add_executable()\n\n        file(MAKE_DIRECTORY \"${CMAKE_BINARY_DIR}/bin\")\n        execute_process(COMMAND ln -sf ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/bin/${MY_PROJ_NAME})\n\n        # copy the extra files together with the executable\n        if(NOT (MY_BINPLACES STREQUAL \"\"))\n            foreach(BF ${MY_BINPLACES})\n                FILE(COPY ${BF} DESTINATION \"${CMAKE_BINARY_DIR}/bin/${MY_PROJ_NAME}\")\n            endforeach()\n        endif()\n    endif()\nendfunction()\n\nfunction(dsn_setup_compiler_flags)\n    if(CMAKE_BUILD_TYPE STREQUAL \"Debug\")\n        add_definitions(-DDSN_BUILD_TYPE=Debug)\n        add_definitions(-g)\n    else()\n        add_definitions(-g)\n        add_definitions(-O2)\n        add_definitions(-DDSN_BUILD_TYPE=Release)\n    endif()\n    cmake_host_system_information(RESULT BUILD_HOSTNAME QUERY HOSTNAME)\n    add_definitions(-DDSN_BUILD_HOSTNAME=${BUILD_HOSTNAME})\n\n    # We want access to the PRI* print format macros.\n    add_definitions(-D__STDC_FORMAT_MACROS)\n\n    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++1y\" CACHE STRING \"\" FORCE)\n\n    #  -Wall: Enable all warnings.\n    add_compile_options(-Wall)\n    add_compile_options(-Werror)\n    #  -Wno-sign-compare: suppress warnings for comparison between signed and unsigned integers\n    add_compile_options(-Wno-sign-compare)\n    add_compile_options(-Wno-strict-aliasing)\n    add_compile_options(-Wuninitialized)\n    add_compile_options(-Wno-unused-result)\n    add_compile_options(-Wno-unused-variable)\n    add_compile_options(-Wno-deprecated-declarations)\n    add_compile_options(-Wno-inconsistent-missing-override)\n    add_compile_options(-Wno-attributes)\n    # -fno-omit-frame-pointer\n    #   use frame pointers to allow simple stack frame walking for backtraces.\n    #   This has a small perf hit but worth it for the ability to profile in production\n    add_compile_options( -fno-omit-frame-pointer)\n    # -Wno-deprecated-register\n    #   kbr5.h uses the legacy 'register' keyword.\n    add_compile_options(-Wno-deprecated-register)\n    # -Wno-implicit-float-conversion\n    #   Poco/Dynamic/VarHolder.h uses 'unsigned long' to 'float' conversion\n    add_compile_options(-Wno-implicit-float-conversion)\n\n    find_program(CCACHE_FOUND ccache)\n    if(CCACHE_FOUND)\n        set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)\n        set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)\n        if (\"${COMPILER_FAMILY}\" STREQUAL \"clang\")\n            add_compile_options(-Qunused-arguments)\n        endif()\n        message(STATUS \"use ccache to speed up compilation\")\n    endif(CCACHE_FOUND)\n\n    # add sanitizer check\n    if(DEFINED SANITIZER)\n        if(NOT ((\"${COMPILER_FAMILY}\" STREQUAL \"clang\") OR\n        (\"${COMPILER_FAMILY}\" STREQUAL \"gcc\" AND \"${COMPILER_VERSION}\" VERSION_GREATER \"5.4.0\")))\n            message(SEND_ERROR \"Cannot use sanitizer without clang or gcc >= 5.4.0\")\n        endif()\n\n        message(STATUS \"Running cmake with sanitizer=${SANITIZER}\")\n        set(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -fsanitize=${SANITIZER}\" CACHE STRING \"\" FORCE)\n        set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -fsanitize=${SANITIZER}\" CACHE STRING \"\" FORCE)\n    endif()\n\n    set(CMAKE_EXE_LINKER_FLAGS\n        \"${CMAKE_EXE_LINKER_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free\"\n        CACHE\n        STRING\n        \"\"\n        FORCE)\n    set(CMAKE_SHARED_LINKER_FLAGS \"${CMAKE_SHARED_LINKER_FLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free\"\n        CACHE\n        STRING\n        \"\"\n        FORCE)\nendfunction(dsn_setup_compiler_flags)\n\n# find necessary system libs\nfunction(dsn_setup_system_libs)\n    find_package(Threads REQUIRED)\n\n    if(CMAKE_SIZEOF_VOID_P EQUAL 8)\n        set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS ON)\n        message(STATUS \"FIND_LIBRARY_USE_LIB64_PATHS = ON\")\n    endif()\n\n    set(DSN_SYSTEM_LIBS \"\")\n\n    if (NOT APPLE)\n        find_package(RT REQUIRED)\n        set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} ${RT_LIBRARIES})\n    endif()\n\n    find_package(DL REQUIRED)\n    set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} ${DL_LIBRARIES})\n\n    # for md5 calculation\n    find_package(OpenSSL REQUIRED)\n    set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} ${OPENSSL_CRYPTO_LIBRARY})\n    if (APPLE)\n        include_directories(SYSTEM ${OPENSSL_ROOT_DIR}/include)\n        link_directories(\"${OPENSSL_ROOT_DIR}/lib\")\n    endif()\n\n    if (NOT APPLE)\n        if(ENABLE_GPERF)\n            set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} tcmalloc_and_profiler)\n            add_definitions(-DDSN_ENABLE_GPERF)\n        endif()\n    endif()\n\n    if(USE_JEMALLOC)\n        find_package(Jemalloc REQUIRED)\n        # also use cpu profiler provided by gperftools\n        set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} JeMalloc::JeMalloc profiler)\n        add_definitions(-DDSN_USE_JEMALLOC)\n    endif()\n\n    set(DSN_SYSTEM_LIBS\n        ${DSN_SYSTEM_LIBS}\n        ${CMAKE_THREAD_LIBS_INIT} # the thread library found by FindThreads\n        CACHE STRING \"rDSN system libs\" FORCE\n    )\nendfunction(dsn_setup_system_libs)\n\nfunction(dsn_setup_include_path)#TODO(huangwei5): remove this\n    include_directories(${DSN_THIRDPARTY_ROOT}/include)\nendfunction(dsn_setup_include_path)\n\nfunction(dsn_setup_thirdparty_libs)\n    set(Boost_USE_MULTITHREADED ON)\n    set(Boost_USE_STATIC_LIBS OFF)\n    set(Boost_USE_STATIC_RUNTIME OFF)\n    set(BOOST_ROOT ${DSN_THIRDPARTY_ROOT})\n    set(Boost_NO_SYSTEM_PATHS ON)\n    set(Boost_NO_BOOST_CMAKE ON)\n\n    set(CMAKE_PREFIX_PATH ${DSN_THIRDPARTY_ROOT};${CMAKE_PREFIX_PATH})\n    find_package(Boost COMPONENTS system filesystem regex REQUIRED)\n    include_directories(${Boost_INCLUDE_DIRS})\n\n    find_library(THRIFT_LIB NAMES libthrift.a PATHS ${DSN_THIRDPARTY_ROOT}/lib NO_DEFAULT_PATH)\n    if(NOT THRIFT_LIB)\n        message(FATAL_ERROR \"thrift library not found in ${DSN_THIRDPARTY_ROOT}/lib\")\n    endif()\n    find_package(fmt REQUIRED)\n    set(DEFAULT_THIRDPARTY_LIBS ${THRIFT_LIB} fmt::fmt CACHE STRING \"default thirdparty libs\" FORCE)\n\n    # rocksdb\n    file(GLOB ROCKSDB_DEPENDS_MODULE_PATH ${DSN_PROJECT_DIR}/thirdparty/build/Source/rocksdb/cmake/modules)\n    if(NOT ROCKSDB_DEPENDS_MODULE_PATH)\n        message(WARNING \"Cannot find RocksDB depends cmake modules path, might not find snappy, zstd, lz4\")\n    endif()\n    list(APPEND CMAKE_MODULE_PATH \"${ROCKSDB_DEPENDS_MODULE_PATH}\")\n    find_package(snappy)\n    find_package(zstd)\n    find_package(lz4)\n    if(USE_JEMALLOC)\n        find_package(Jemalloc REQUIRED)\n    endif()\n    find_package(RocksDB REQUIRED)\n\n    # libhdfs\n    find_package(JNI REQUIRED)\n    message (STATUS \"JAVA_JVM_LIBRARY=${JAVA_JVM_LIBRARY}\")\n    link_libraries(${JAVA_JVM_LIBRARY})\n\n    link_directories(${DSN_THIRDPARTY_ROOT}/lib)\n    if (NOT APPLE)\n        link_directories(${DSN_THIRDPARTY_ROOT}/lib64)\n    endif()\nendfunction(dsn_setup_thirdparty_libs)\n\nfunction(dsn_common_setup)\n    if(NOT (UNIX))\n        message(FATAL_ERROR \"Only Unix are supported.\")\n    endif()\n\n    if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR)\n        message(FATAL_ERROR \"In-source builds are not allowed.\")\n    endif()\n\n    if(NOT DEFINED DSN_BUILD_RUNTIME)\n        set(DSN_BUILD_RUNTIME FALSE)\n    endif()\n\n    set(BUILD_SHARED_LIBS OFF)\n\n    include(CheckCXXCompilerFlag)\n    CHECK_CXX_COMPILER_FLAG(\"-std=c++1y\" COMPILER_SUPPORTS_CXX1Y)\n    if(NOT ${COMPILER_SUPPORTS_CXX1Y})\n        message(FATAL_ERROR \"You need a compiler with C++1y support.\")\n    endif()\n\n    dsn_setup_system_libs()\n    dsn_setup_compiler_flags()\n    dsn_setup_include_path()\n    dsn_setup_thirdparty_libs()\n\n    include(${DSN_PROJECT_DIR}/bin/thrift_utils.cmake)\n\nendfunction(dsn_common_setup)\n"
  },
  {
    "path": "bin/thrift_utils.cmake",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nfind_program(THRIFT_COMPILER\n    NAME\n        thrift\n    PATHS\n        ${DSN_THIRDPARTY_ROOT}/bin\n    NO_DEFAULT_PATH\n)\n\nset(THRIFT_GENERATED_FILE_PATH ${CMAKE_BINARY_DIR}/thrift-gen CACHE INTERNAL \"Where the thrift generated sources locate\")\nif(NOT EXISTS ${THRIFT_GENERATED_FILE_PATH})\n    file(MAKE_DIRECTORY ${THRIFT_GENERATED_FILE_PATH})\nendif()\nmessage(STATUS \"THRIFT_GENERATED_FILE_PATH=${THRIFT_GENERATED_FILE_PATH}\")\ninclude_directories(${THRIFT_GENERATED_FILE_PATH})\n\n# THRIFT_GENERATE_CPP is used to generate sources using the thrift compiler.\n#\n# Example:\n#\n# thrift_generate_cpp(\n#     REQUEST_META_THRIFT_SRCS\n#     REQUEST_META_THRIFT_HDRS\n#     ${CMAKE_CURRENT_SOURCE_DIR}/request_meta.thrift\n# )\n# add_library(\n#     dsn_rpc\n#     ${REQUEST_META_THRIFT_SRCS}\n#     ...\n# )\nfunction(THRIFT_GENERATE_CPP SRCS HDRS thrift_file)\n    if(NOT EXISTS ${thrift_file})\n        message(FATAL_ERROR \"thrift file ${thrift_file} does not exist\")\n    endif()\n\n    message(STATUS \"THRIFT_GENERATE_CPP: ${thrift_file}\")\n\n    exec_program(${THRIFT_COMPILER}\n        ARGS -gen cpp:moveable_types --out ${THRIFT_GENERATED_FILE_PATH} --gen cpp ${thrift_file}\n        OUTPUT_VARIABLE __thrift_OUT\n        RETURN_VALUE THRIFT_RETURN)\n    if(NOT ${THRIFT_RETURN} EQUAL \"0\")\n        message(STATUS \"COMMAND: ${THRIFT_COMPILER} -gen cpp:moveable_types --out ${THRIFT_GENERATED_FILE_PATH} --gen cpp ${thrift_file}\")\n        message(FATAL_ERROR \"thrift-compiler exits with \" ${THRIFT_RETURN} \": \" ${__thrift_OUT})\n    endif()\n\n    get_filename_component(__thrift_name ${thrift_file} NAME_WE)\n\n    set(${SRCS})\n    set(${HDRS})\n    file(GLOB __result_src \"${THRIFT_GENERATED_FILE_PATH}/${__thrift_name}_types.cpp\")\n    file(GLOB __result_hdr \"${THRIFT_GENERATED_FILE_PATH}/${__thrift_name}_types.h\")\n    list(APPEND ${SRCS} ${__result_src})\n    list(APPEND ${HDRS} ${__result_hdr})\n    # Sets the variables in global scope.\n    set(${SRCS} ${${SRCS}} PARENT_SCOPE)\n    set(${HDRS} ${${HDRS}} PARENT_SCOPE)\n\n    # install the thrift generated headers to include/\n    install(FILES ${__result_hdr} DESTINATION include)\nendfunction()\n"
  },
  {
    "path": "compile_thrift.py",
    "content": "#!/usr/bin/env python2\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nimport os\nimport sys\nimport platform\nimport re\n\n'''\nthe default thrift generator\n'''\n\nthrift_description = [\n    {\n        \"name\": \"dsn.layer2\",\n        \"path\": \"src\",\n        \"include_fix\": {\n            \"_types.h\": {\n                \"add\": [\"<dsn/cpp/serialization_helper/dsn_types.h>\"],\n                \"remove\": [\"\\\"dsn_types.h\\\"\"]\n            },\n            \"_types.cpp\": {\n                \"add\": [\"<dsn/cpp/serialization_helper/dsn.layer2_types.h>\"],\n                \"remove\": [\"\\\"dsn.layer2_types.h\\\"\"]\n            }\n        },\n        \"file_move\": {\n            \"_types.h\": \"include/dsn/cpp/serialization_helper\",\n            \"_types.cpp\": \"src/runtime\"\n        }\n    },\n]\n\n\nclass CompileError(Exception):\n    \"\"\" Raised when dealing with thrift idl have errors\"\"\"\n\n    def __init__(self, msg):\n        self.msg = msg\n\n    def __str__(self):\n        return self.msg\n\n\ndef fix_include_file(filename, fix_commands):\n    tmp_result = filename + \".swapfile\"\n    from_fd, to_fd = open(filename, \"r\"), open(tmp_result, \"w\")\n\n    add_ok = not \"add\" in fix_commands\n\n    for line in from_fd:\n        include_statement = False\n        if len(line.strip()) > 0:\n            stripped_line = line.strip()\n            if stripped_line[0] == \"#\" and \"include\" in stripped_line:\n                include_statement = True\n\n        if include_statement == True and add_ok == False:\n            add_includes = \"\\n\".join([\"#include %s\" % (s)\n                                      for s in fix_commands[\"add\"]])\n            to_fd.write(add_includes + \"\\n\")\n            add_ok = True\n\n        if include_statement == True and (\"remove\" in fix_commands):\n            if len(filter(lambda x: x in line, fix_commands[\"remove\"])) == 0:\n                to_fd.write(line)\n        else:\n            to_fd.write(line)\n\n    from_fd.close()\n    to_fd.close()\n\n    os.remove(filename)\n    os.rename(tmp_result, filename)\n\n\ndef fix_include(thrift_name, include_fix_dict):\n    # current dir is thrift file dir\n    os.chdir(\"output\")\n\n    for pair in include_fix_dict.iteritems():\n        filename = thrift_name + pair[0]\n        fix_include_file(filename, pair[1])\n\n    os.chdir(\"..\")\n\n\ndef compile_thrift_file(thrift_info):\n    thrift_name = thrift_info[\"name\"]\n    print \"\\n>>> compiling thrift file %s.thrift ...\" % (thrift_name)\n\n    if \"path\" not in thrift_info:\n        raise CompileError(\"can't find thrift file\")\n\n    # ensure <name>.thrift exists\n    os.chdir(root_dir + \"/\" + thrift_info[\"path\"])\n    if os.path.isfile(thrift_name+\".thrift\") == False:\n        raise CompileError(\"can't find thrift file\")\n\n    # create tmp directory: <thrift_info[\"path\"]>/output\n    os.system(\"rm -rf output\")\n    os.system(\"mkdir output\")\n    print \"mkdir {}/output\".format(os.getcwd())\n\n    # generate files\n    cmd = \"{} -gen cpp:moveable_types -out output {}.thrift\".format(\n        thrift_exe, thrift_name)\n    os.system(cmd)\n    print cmd\n\n    # TODO(wutao1): code format files\n    # os.system(\"clang-format-3.9 -i output/*\")\n\n    if \"include_fix\" in thrift_info:\n        fix_include(thrift_name, thrift_info[\"include_fix\"])\n\n    if \"hook\" in thrift_info:\n        os.chdir(\"output\")\n        for hook_func, args in thrift_info[\"hook\"]:\n            hook_func(args)\n        os.chdir(\"..\")\n\n    if \"file_move\" in thrift_info:\n        for pair in thrift_info[\"file_move\"].iteritems():\n            dest_path = root_dir + \"/\" + pair[1]\n            for postfix in pair[0].split():\n                src_path = \"output/%s%s\" % (thrift_name, postfix)\n                cmd = \"mv %s %s\" % (src_path, dest_path)\n                os.system(cmd)\n                print cmd\n\n    os.system(\"rm -rf output\")\n    print \"rm -rf {}/output\".format(os.getcwd())\n\n    os.chdir(root_dir)\n\n\n# special hooks for thrift, all these are executed in the output dir\n\n\ndef constructor_hook(args):\n    generated_fname = args[0]\n    class_name = args[1]\n    add_code = args[2]\n\n    target_fname = generated_fname + \".swapfile\"\n    src_fd, dst_fd = open(generated_fname, \"r\"), open(target_fname, \"w\")\n\n    in_class = 0\n    for line in src_fd:\n        if in_class == 1:\n            if \"public:\" in line:\n                line = line + add_code + \"\\n\"\n            elif \"bool operator <\" in line:\n                line = \"\"\n            # this may not be right\n            elif line.startswith(\"};\"):\n                in_class = 2\n        elif in_class == 0 and line.startswith(\"class \" + class_name + \" {\"):\n            in_class = 1\n        dst_fd.write(line)\n\n    src_fd.close()\n    dst_fd.close()\n\n    os.remove(generated_fname)\n    os.rename(target_fname, generated_fname)\n\n\ndef replace_hook(args):\n    generated_fname = args[0]\n    replace_map = args[1]\n\n    target_fname = generated_fname + \".swapfile\"\n    src_fd, dst_fd = open(generated_fname, \"r\"), open(target_fname, \"w\")\n\n    for line in src_fd:\n        for key, value in replace_map.items():\n            line = re.sub(key, value, line)\n        dst_fd.write(line)\n\n    src_fd.close()\n    dst_fd.close()\n\n    os.remove(generated_fname)\n    os.rename(target_fname, generated_fname)\n\n\ndef add_hook(name, path, func, args):\n    for i in thrift_description:\n        if name == i[\"name\"] and path == i[\"path\"]:\n            if \"hook\" not in i:\n                i[\"hook\"] = [(func, args)]\n            else:\n                i[\"hook\"].append((func, args))\n\n\nif __name__ == \"__main__\":\n    thrift_exe = os.getcwd() + \"/thirdparty/output/bin/thrift\"\n    root_dir = os.getcwd()\n    print \"thrift_exe = \" + thrift_exe\n    print \"root_dir = \" + root_dir\n\n    if not os.path.isfile(thrift_exe):\n        print \"Error: can't find compiler %s\\nPlease build thrift in thirdparty/\" % thrift_exe\n        sys.exit()\n\n    ctor_kv_pair = \"  kv_pair(const std::string& _key, const std::string& _val): key(_key), value(_val) {\\n  }\"\n    ctor_configuration_proposal_action = \"  configuration_proposal_action(::dsn::rpc_address t, ::dsn::rpc_address n, config_type::type tp): target(t), node(n), type(tp) {}\"\n    add_hook(\"simple_kv\", \"src/replica/storage/simple_kv\", constructor_hook,\n             [\"simple_kv_types.h\", \"kv_pair\", ctor_kv_pair])\n    add_hook(\"replication\", \"src/\", constructor_hook,\n             [\"replication_types.h\", \"configuration_proposal_action\", ctor_configuration_proposal_action])\n    add_hook(\"dsn.layer2\", \"src\", replace_hook, [\"dsn.layer2_types.h\", {\n             r\"dsn\\.layer2_TYPES_H\": 'dsn_layer2_TYPES_H'}])\n\n    for i in thrift_description:\n        compile_thrift_file(i)\n"
  },
  {
    "path": "include/dsn/c/api_common.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     basic data structures and macros for rDSN service API\n *\n * Revision history:\n *     Feb., 2016, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <stdint.h>\n#include <stddef.h>\n#include <stdarg.h>\n#include <dsn/utility/dlib.h>\n\n#ifdef __cplusplus\n#define DEFAULT(value) = value\n#define NORETURN [[noreturn]]\n#else\n#define DEFAULT(value)\n#define NORETURN\n#include <stdbool.h>\n#endif\n\n#define DSN_MAX_TASK_CODE_NAME_LENGTH 48\n#define DSN_MAX_ERROR_CODE_NAME_LENGTH 48\n#define DSN_MAX_ADDRESS_NAME_LENGTH 48\n#define DSN_MAX_BUFFER_COUNT_IN_MESSAGE 64\n#define DSN_MAX_APP_TYPE_NAME_LENGTH 32\n#define DSN_MAX_CALLBAC_COUNT 32\n#define DSN_MAX_APP_COUNT_IN_SAME_PROCESS 256\n#define DSN_MAX_PATH 1024\n\ntypedef void *dsn_handle_t;\n"
  },
  {
    "path": "include/dsn/c/api_layer1.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n#include <dsn/c/api_task.h>\n#include <dsn/tool-api/gpid.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/task_tracker.h>\n\n/*!\n @defgroup service-api-c Core Service API\n\n @ingroup service-api\n\n  Core service API for building applications and distributed frameworks, which\n  covers the major categories that a server application may use, shown in the modules below.\n\n @{\n */\n\n/*!\n @defgroup task-common Common Task Operations\n\n Common Task/Event Operations\n\n rDSN adopts the event-driven programming model, where all computations (event handlers) are\n represented as individual tasks; each is the execution of a sequential piece of code in one thread.\n Specifically, rDSN categorizes the tasks into four types, as defined in \\ref dsn_task_type_t.\n\nUnlike the traditional event-driven programming, rDSN enhances the model in the following ways,\nwith which they control the application in many aspects in a declarative approach.\n\n- each task is labeled with a task code, with which developers can configure many aspects in config\nfiles.\n  Developers can define new task code using \\ref DEFINE_TASK_CODE, or \\ref dsn_task_code_register.\n\n  <PRE>\n  [task..default]\n  ; allow task executed in other thread pools or tasks\n  ; for TASK_TYPE_COMPUTE - allow-inline allows a task being executed in its caller site\n  ; for other tasks - allow-inline allows a task being execution in io-thread\n  allow_inline = false\n\n  ; group rpc mode with group address: GRPC_TO_LEADER, GRPC_TO_ALL, GRPC_TO_ANY\n  grpc_mode = GRPC_TO_LEADER\n\n  ; when toollet profiler is enabled\n  is_profile = true\n\n  ; when toollet tracer is enabled\n  is_trace = true\n\n  ; thread pool to execute the task\n  pool_code = THREAD_POOL_DEFAULT\n\n  ; task priority\n  priority = TASK_PRIORITY_COMMON\n\n  ; whether to randomize the timer delay to random(0, timer_interval),\n  ; if the initial delay is zero, to avoid multiple timers executing at the same time (e.g.,\ncheckpointing)\n  randomize_timer_delay_if_zero = false\n\n  ; what kind of network channel for this kind of rpc calls\n  rpc_call_channel = RPC_CHANNEL_TCP\n\n  ; what kind of header format for this kind of rpc calls\n  rpc_call_header_format = NET_HDR_DSN\n\n  ; how many milliseconds to delay recving rpc session for\n  ; when queue length ~= [1.0, 1.2, 1.4, 1.6, 1.8, >=2.0] x pool.queue_length_throttling_threshold,\n  ; e.g., 0, 0, 1, 2, 5, 10\n  rpc_request_delays_milliseconds = 0, 0, 1, 2, 5, 10\n\n  ; whether to drop a request right before execution when its queueing time\n  ; is already greater than its timeout value\n  rpc_request_dropped_before_execution_when_timeout = false\n\n  ; for how long (ms) the request will be resent if no response\n  ; is received yet, 0 for disable this feature\n  rpc_request_resend_timeout_milliseconds = 0\n\n  ; throttling mode for rpc requets: TM_NONE, TM_REJECT, TM_DELAY when\n  ; queue length > pool.queue_length_throttling_threshold\n  rpc_request_throttling_mode = TM_NONE\n\n  ; what is the default timeout (ms) for this kind of rpc calls\n  rpc_timeout_milliseconds = 5000\n\n  [task.LPC_AIO_IMMEDIATE_CALLBACK]\n  ; override the option in [task..default]\n  allow_inline = true\n  </PRE>\n\n- each task code is bound to a thread pool, which can be customized as follows.\n  Developers can define new thread pools using \\ref DEFINE_THREAD_POOL_CODE, or \\ref\ndsn_threadpool_code_register.\n\n  <PRE>\n  [threadpool..default]\n\n  ; how many tasks (if available) should be returned for\n  ; one dequeue call for best batching performance\n  dequeue_batch_size = 5\n\n  ; throttling: whether to enable throttling with virtual queues\n  enable_virtual_queue_throttling = false\n\n  ; thread pool name\n  name = THREAD_POOL_INVALID\n\n  ; whethe the threads share a single queue(partitioned=false) or not;\n  ; the latter is usually for workload hash partitioning for avoiding locking\n  partitioned = false\n\n  ; task queue aspects names, usually for tooling purpose\n  queue_aspects =\n\n  ; task queue provider name\n  queue_factory_name = dsn::tools::hpc_concurrent_task_queue\n\n  ; throttling: throttling threshold above which rpc requests will be dropped\n  queue_length_throttling_threshold = 1000000\n\n  ; what CPU cores are assigned to this pool, 0 for all\n  worker_affinity_mask = 0\n\n  ; task aspects names, usually for tooling purpose\n  worker_aspects =\n\n  ; thread/worker count\n  worker_count = 2\n\n  ; task worker provider name\n  worker_factory_name =\n\n  ; thread priority\n  worker_priority = THREAD_xPRIORITY_NORMAL\n\n  ; whether the threads share all assigned cores\n  worker_share_core = true\n\n  [threadpool.THREAD_POOL_DEFAULT]\n  ; override default options in [threadpool..default]\n  dequeue_batch_size = 5\n\n  </PRE>\n-\n\n @{\n */\n/*! cancel the later execution of the timer task inside the timer */\nextern DSN_API void dsn_task_cancel_current_timer();\n\n/*!\n check whether the task is currently running inside the given task\n\n \\param t the given task handle\n\n \\return true if it is.\n */\nextern DSN_API bool dsn_task_is_running_inside(dsn::task *t);\n\n/*@}*/\n\n/*!\n @defgroup tasking Asynchronous Tasks and Timers\n\n Asynchronous Tasks and Timers\n\n @{\n */\n\n/*!\n@defgroup rpc Remote Procedure Call (RPC)\n\nRemote Procedure Call (RPC)\n\nNote developers can easily plugin their own implementation to\nreplace the underneath implementation of the network (e.g., RDMA, simulated network)\n@{\n*/\n\n/*!\n@{\n*/\n\nextern DSN_API dsn::rpc_address dsn_primary_address();\n\n/*!\n@defgroup rpc-server Server-Side RPC Primitives\n\nServer-Side RPC Primitives\n@{\n */\n\n/*! register callback to handle RPC request */\nextern DSN_API bool dsn_rpc_register_handler(dsn::task_code code,\n                                             const char *extra_name,\n                                             const dsn::rpc_request_handler &cb);\n\n/*! unregister callback to handle RPC request, returns true if unregister ok, false if no handler\n    was registered */\nextern DSN_API bool dsn_rpc_unregiser_handler(dsn::task_code code);\n\n/*! reply with a response which is created using dsn::message_ex::create_response */\nextern DSN_API void dsn_rpc_reply(dsn::message_ex *response,\n                                  dsn::error_code err DEFAULT(dsn::ERR_OK));\n\n/*! forward the request to another server instead */\nextern DSN_API void dsn_rpc_forward(dsn::message_ex *request, dsn::rpc_address addr);\n\n/*@}*/\n\n/*!\n@defgroup rpc-client Client-Side RPC Primitives\n\nClient-Side RPC Primitives\n@{\n*/\n\n/*! client invokes the RPC call */\nextern DSN_API void dsn_rpc_call(dsn::rpc_address server, dsn::rpc_response_task *rpc_call);\n\n/*!\n   client invokes the RPC call and waits for its response, note\n   returned msg must be explicitly released using \\ref dsn::message_ex::release_ref\n */\nextern DSN_API dsn::message_ex *dsn_rpc_call_wait(dsn::rpc_address server,\n                                                  dsn::message_ex *request);\n\n/*! one-way RPC from client, no rpc response is expected */\nextern DSN_API void dsn_rpc_call_one_way(dsn::rpc_address server, dsn::message_ex *request);\n\n/*@}*/\n\n/*@}*/\n\nextern DSN_API uint64_t dsn_now_ns();\n\n__inline uint64_t dsn_now_us() { return dsn_now_ns() / 1000; }\n__inline uint64_t dsn_now_ms() { return dsn_now_ns() / 1000000; }\n__inline uint64_t dsn_now_s() { return dsn_now_ns() / 1000000000; }\n\n/*@}*/\n\n/*@}*/\n\n/*@}*/\n"
  },
  {
    "path": "include/dsn/c/api_task.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     task and execution model\n *\n * Revision history:\n *     Feb., 2016, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/threadpool_code.h>\n#include <dsn/tool-api/task_code.h>\n\n/*!\n@addtogroup task-common\n@{\n */\n\nnamespace dsn {\nclass message_ex;\n\ntypedef std::function<void()> task_handler;\n\n/// A callback to handle rpc requests.\n///\n/// Parameters:\n///  - dsn::message_ex*: the received rpc request\ntypedef std::function<void(dsn::message_ex *)> rpc_request_handler;\n\n/// A callback to handle rpc responses.\n///\n/// Parameters:\n///  - error_code\n///  - message_ex: the sent rpc request\n///  - message_ex: the received rpc response\ntypedef std::function<void(dsn::error_code, dsn::message_ex *, dsn::message_ex *)>\n    rpc_response_handler;\n\n/// Parameters:\n///  - error_code\n///  - size_t: the read or written size of bytes from file.\ntypedef std::function<void(dsn::error_code, size_t)> aio_handler;\n\nclass task;\nclass raw_task;\nclass rpc_request_task;\nclass rpc_response_task;\nclass aio_task;\n}\n/*!\napps updates the value at dsn_task_queue_virtual_length_ptr(..) to control\nthe length of a vitual queue (bound to current code + hash) to\nenable customized throttling, see spec of thread pool for more information\n*/\nextern DSN_API volatile int *dsn_task_queue_virtual_length_ptr(dsn::task_code code,\n                                                               int hash DEFAULT(0));\n\n/*@}*/\n"
  },
  {
    "path": "include/dsn/c/api_utilities.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     useful utilities in rDSN exposed via C API\n *\n * Revision history:\n *     Feb., 2016, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n#include <dsn/utility/ports.h>\n\n/*!\n@defgroup logging Logging Service\n@ingroup service-api-utilities\n\n Logging Service\n\n Note developers can plug into rDSN their own logging libraryS\n implementation, so as to integrate rDSN logs into\n their own cluster operation systems.\n@{\n*/\n\ntypedef enum dsn_log_level_t {\n    LOG_LEVEL_INFORMATION,\n    LOG_LEVEL_DEBUG,\n    LOG_LEVEL_WARNING,\n    LOG_LEVEL_ERROR,\n    LOG_LEVEL_FATAL,\n    LOG_LEVEL_COUNT,\n    LOG_LEVEL_INVALID\n} dsn_log_level_t;\n\n// logs with level smaller than this start_level will not be logged\nextern DSN_API dsn_log_level_t dsn_log_start_level;\nextern DSN_API dsn_log_level_t dsn_log_get_start_level();\nextern DSN_API void dsn_log_set_start_level(dsn_log_level_t level);\nextern DSN_API void dsn_logv(const char *file,\n                             const char *function,\n                             const int line,\n                             dsn_log_level_t log_level,\n                             const char *fmt,\n                             va_list args);\nextern DSN_API void dsn_logf(const char *file,\n                             const char *function,\n                             const int line,\n                             dsn_log_level_t log_level,\n                             const char *fmt,\n                             ...);\nextern DSN_API void dsn_log(const char *file,\n                            const char *function,\n                            const int line,\n                            dsn_log_level_t log_level,\n                            const char *str);\nextern DSN_API void dsn_coredump();\n\n// __FILENAME__ macro comes from the cmake, in which we calculate a filename without path.\n#define dlog(level, ...)                                                                           \\\n    do {                                                                                           \\\n        if (level >= dsn_log_start_level)                                                          \\\n            dsn_logf(__FILENAME__, __FUNCTION__, __LINE__, level, __VA_ARGS__);                    \\\n    } while (false)\n#define dinfo(...) dlog(LOG_LEVEL_INFORMATION, __VA_ARGS__)\n#define ddebug(...) dlog(LOG_LEVEL_DEBUG, __VA_ARGS__)\n#define dwarn(...) dlog(LOG_LEVEL_WARNING, __VA_ARGS__)\n#define derror(...) dlog(LOG_LEVEL_ERROR, __VA_ARGS__)\n#define dfatal(...) dlog(LOG_LEVEL_FATAL, __VA_ARGS__)\n#define dassert(x, ...)                                                                            \\\n    do {                                                                                           \\\n        if (dsn_unlikely(!(x))) {                                                                  \\\n            dlog(LOG_LEVEL_FATAL, \"assertion expression: \" #x);                                    \\\n            dlog(LOG_LEVEL_FATAL, __VA_ARGS__);                                                    \\\n            dsn_coredump();                                                                        \\\n        }                                                                                          \\\n    } while (false)\n\n#define dreturn_not_ok_logged(err, ...)                                                            \\\n    do {                                                                                           \\\n        if (dsn_unlikely((err) != dsn::ERR_OK)) {                                                  \\\n            derror(__VA_ARGS__);                                                                   \\\n            return err;                                                                            \\\n        }                                                                                          \\\n    } while (0)\n\n#ifndef NDEBUG\n#define dbg_dassert dassert\n#else\n#define dbg_dassert(x, ...)\n#endif\n\n#ifdef DSN_MOCK_TEST\n#define mock_private public\n#define mock_virtual virtual\n#else\n#define mock_private private\n#define mock_virtual\n#endif\n\n/*@}*/\n\n#define dverify(exp)                                                                               \\\n    if (dsn_unlikely(!(exp)))                                                                      \\\n    return false\n\n#define dverify_exception(exp)                                                                     \\\n    do {                                                                                           \\\n        try {                                                                                      \\\n            exp;                                                                                   \\\n        } catch (...) {                                                                            \\\n            return false;                                                                          \\\n        }                                                                                          \\\n    } while (0)\n\n#define dverify_logged(exp, level, ...)                                                            \\\n    do {                                                                                           \\\n        if (dsn_unlikely(!(exp))) {                                                                \\\n            dlog(level, __VA_ARGS__);                                                              \\\n            return false;                                                                          \\\n        }                                                                                          \\\n    } while (0)\n\n#define dstop_on_false(exp)                                                                        \\\n    if (dsn_unlikely(!(exp)))                                                                      \\\n    return\n#define dstop_on_false_logged(exp, level, ...)                                                     \\\n    do {                                                                                           \\\n        if (dsn_unlikely(!(exp))) {                                                                \\\n            dlog(level, __VA_ARGS__);                                                              \\\n            return;                                                                                \\\n        }                                                                                          \\\n    } while (0)\n/*@}*/\n"
  },
  {
    "path": "include/dsn/c/app_model.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     application model in rDSN\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n\n/*!\n mimic an app as if the following execution in the current thread are\n executed in the target app's threads.\n\n \\param app_name name of the application, note it is not the type name\n \\param index    one-based index of the application instances\n\n \\return true if it succeeds, false if it fails.\n\n This is useful when we want to leverage 3rd party library into rDSN\n application and call rDSN service API in the threads that are created\n by the 3rd party code.\n\n For cases we simply want to use a rDSN-based client library in a non-rDSN\n application, developers can simply set [core] enable_default_app_mimic = true\n in configuration file. See more details at \\ref enable_default_app_mimic.\n\n */\nextern DSN_API bool dsn_mimic_app(const char *app_role, int index);\n\n/*!\n start the system with given configuration\n\n \\param config           the configuration file for this run\n \\param is_server whether it is server or not, default is false\n\n \\return true if it succeeds, false if it fails.\n */\nextern DSN_API bool dsn_run_config(const char *config, bool is_server DEFAULT(false));\n\n/*!\n start the system with given arguments\n\n \\param argc             argc in C main convention\n \\param argv             argv in C main convention\n \\param is_server whether it is server or not, default is false\n\n \\return true if it succeeds, false if it fails.\n\n Usage:\n   config-file [-cargs k1=v1;k2=v2] [-app_list app_name1@index1;app_name2@index]\n\n Examples:\n - config.ini -app_list replica@1 to start the first replica as a new process\n - config.ini -app_list replica to start ALL replicas (count specified in config) as a new\n process\n - config.ini -app_list replica -cargs replica-port=34556 to start ALL replicas\n   with given port variable specified in config.ini\n - config.ini to start ALL apps as a new process\n\n Note the argc, argv folllows the C main convention that argv[0] is the executable name.\n */\nextern DSN_API void dsn_run(int argc, char **argv, bool is_server DEFAULT(false));\n\n/*!\n exit the process with the given exit code\n\n \\param code exit code for the process\n\n rDSN runtime does not provide elegant exit routines. Thereafter, developers call dsn_exit\n to exit the current process to avoid exceptions happending during normal exit.\n */\nNORETURN extern DSN_API void dsn_exit(int code);\n"
  },
  {
    "path": "include/dsn/cpp/json_helper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <vector>\n#include <map>\n#include <unordered_map>\n#include <set>\n#include <sstream>\n#include <string>\n#include <type_traits>\n#include <cctype>\n\n#include <rapidjson/ostreamwrapper.h>\n#include <rapidjson/prettywriter.h>\n#include <rapidjson/writer.h>\n#include <rapidjson/document.h>\n\n#include <boost/lexical_cast.hpp>\n\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/utils.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replication_enums.h>\n\n#define JSON_ENCODE_ENTRY(out, prefix, T)                                                          \\\n    out.Key(#T);                                                                                   \\\n    ::dsn::json::json_forwarder<std::decay<decltype((prefix).T)>::type>::encode(out, (prefix).T)\n#define JSON_ENCODE_ENTRIES2(out, prefix, T1, T2)                                                  \\\n    JSON_ENCODE_ENTRY(out, prefix, T1);                                                            \\\n    JSON_ENCODE_ENTRY(out, prefix, T2)\n#define JSON_ENCODE_ENTRIES3(out, prefix, T1, T2, T3)                                              \\\n    JSON_ENCODE_ENTRIES2(out, prefix, T1, T2);                                                     \\\n    JSON_ENCODE_ENTRY(out, prefix, T3)\n#define JSON_ENCODE_ENTRIES4(out, prefix, T1, T2, T3, T4)                                          \\\n    JSON_ENCODE_ENTRIES3(out, prefix, T1, T2, T3);                                                 \\\n    JSON_ENCODE_ENTRY(out, prefix, T4)\n#define JSON_ENCODE_ENTRIES5(out, prefix, T1, T2, T3, T4, T5)                                      \\\n    JSON_ENCODE_ENTRIES4(out, prefix, T1, T2, T3, T4);                                             \\\n    JSON_ENCODE_ENTRY(out, prefix, T5)\n#define JSON_ENCODE_ENTRIES6(out, prefix, T1, T2, T3, T4, T5, T6)                                  \\\n    JSON_ENCODE_ENTRIES5(out, prefix, T1, T2, T3, T4, T5);                                         \\\n    JSON_ENCODE_ENTRY(out, prefix, T6)\n#define JSON_ENCODE_ENTRIES7(out, prefix, T1, T2, T3, T4, T5, T6, T7)                              \\\n    JSON_ENCODE_ENTRIES6(out, prefix, T1, T2, T3, T4, T5, T6);                                     \\\n    JSON_ENCODE_ENTRY(out, prefix, T7)\n#define JSON_ENCODE_ENTRIES8(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8)                          \\\n    JSON_ENCODE_ENTRIES7(out, prefix, T1, T2, T3, T4, T5, T6, T7);                                 \\\n    JSON_ENCODE_ENTRY(out, prefix, T8)\n#define JSON_ENCODE_ENTRIES9(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9)                      \\\n    JSON_ENCODE_ENTRIES8(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8);                             \\\n    JSON_ENCODE_ENTRY(out, prefix, T9)\n#define JSON_ENCODE_ENTRIES10(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)                \\\n    JSON_ENCODE_ENTRIES9(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9);                         \\\n    JSON_ENCODE_ENTRY(out, prefix, T10)\n#define JSON_ENCODE_ENTRIES11(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)           \\\n    JSON_ENCODE_ENTRIES10(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10);                   \\\n    JSON_ENCODE_ENTRY(out, prefix, T11)\n#define JSON_ENCODE_ENTRIES12(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)      \\\n    JSON_ENCODE_ENTRIES11(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11);              \\\n    JSON_ENCODE_ENTRY(out, prefix, T12)\n#define JSON_ENCODE_ENTRIES13(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13) \\\n    JSON_ENCODE_ENTRIES12(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12);         \\\n    JSON_ENCODE_ENTRY(out, prefix, T13)\n#define JSON_ENCODE_ENTRIES14(                                                                     \\\n    out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)                      \\\n    JSON_ENCODE_ENTRIES13(out, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13);    \\\n    JSON_ENCODE_ENTRY(out, prefix, T14)\n\n#define JSON_DECODE_ENTRY(in, prefix, T)                                                           \\\n    do {                                                                                           \\\n        dverify(in.HasMember(#T));                                                                 \\\n        dverify(::dsn::json::json_forwarder<std::decay<decltype((prefix).T)>::type>::decode(       \\\n            in[#T], (prefix).T));                                                                  \\\n    } while (0)\n\n#define JSON_TRY_DECODE_ENTRY(in, prefix, T)                                                       \\\n    do {                                                                                           \\\n        ++arguments_count;                                                                         \\\n        if (in.HasMember(#T)) {                                                                    \\\n            dverify(::dsn::json::json_forwarder<std::decay<decltype((prefix).T)>::type>::decode(   \\\n                in[#T], (prefix).T));                                                              \\\n            ++parsed_count;                                                                        \\\n        }                                                                                          \\\n    } while (0)\n\n#define JSON_DECODE_ENTRIES2(in, prefix, T1, T2)                                                   \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T1);                                                         \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T2)\n#define JSON_DECODE_ENTRIES3(in, prefix, T1, T2, T3)                                               \\\n    JSON_DECODE_ENTRIES2(in, prefix, T1, T2);                                                      \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T3)\n#define JSON_DECODE_ENTRIES4(in, prefix, T1, T2, T3, T4)                                           \\\n    JSON_DECODE_ENTRIES3(in, prefix, T1, T2, T3);                                                  \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T4)\n#define JSON_DECODE_ENTRIES5(in, prefix, T1, T2, T3, T4, T5)                                       \\\n    JSON_DECODE_ENTRIES4(in, prefix, T1, T2, T3, T4);                                              \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T5)\n#define JSON_DECODE_ENTRIES6(in, prefix, T1, T2, T3, T4, T5, T6)                                   \\\n    JSON_DECODE_ENTRIES5(in, prefix, T1, T2, T3, T4, T5);                                          \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T6)\n#define JSON_DECODE_ENTRIES7(in, prefix, T1, T2, T3, T4, T5, T6, T7)                               \\\n    JSON_DECODE_ENTRIES6(in, prefix, T1, T2, T3, T4, T5, T6);                                      \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T7)\n#define JSON_DECODE_ENTRIES8(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8)                           \\\n    JSON_DECODE_ENTRIES7(in, prefix, T1, T2, T3, T4, T5, T6, T7);                                  \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T8)\n#define JSON_DECODE_ENTRIES9(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9)                       \\\n    JSON_DECODE_ENTRIES8(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8);                              \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T9)\n#define JSON_DECODE_ENTRIES10(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10)                 \\\n    JSON_DECODE_ENTRIES9(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9);                          \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T10)\n#define JSON_DECODE_ENTRIES11(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11)            \\\n    JSON_DECODE_ENTRIES10(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10);                    \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T11)\n#define JSON_DECODE_ENTRIES12(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12)       \\\n    JSON_DECODE_ENTRIES11(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11);               \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T12)\n#define JSON_DECODE_ENTRIES13(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13)  \\\n    JSON_DECODE_ENTRIES12(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12);          \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T13)\n#define JSON_DECODE_ENTRIES14(                                                                     \\\n    in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14)                       \\\n    JSON_DECODE_ENTRIES13(in, prefix, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13);     \\\n    JSON_TRY_DECODE_ENTRY(in, prefix, T14)\n\n#define JSON_ENTRIES_GET_MACRO(                                                                    \\\n    ph1, ph2, ph3, ph4, ph5, ph6, ph7, ph8, ph9, ph10, ph11, ph12, ph13, ph14, NAME, ...)          \\\n    NAME\n// workaround due to the way VC handles \"...\"\n#define JSON_ENTRIES_GET_MACRO_(tuple) JSON_ENTRIES_GET_MACRO tuple\n\n#define JSON_ENCODE_ENTRIES(out, prefix, ...)                                                      \\\n    out.StartObject();                                                                             \\\n    JSON_ENTRIES_GET_MACRO_((__VA_ARGS__,                                                          \\\n                             JSON_ENCODE_ENTRIES14,                                                \\\n                             JSON_ENCODE_ENTRIES13,                                                \\\n                             JSON_ENCODE_ENTRIES12,                                                \\\n                             JSON_ENCODE_ENTRIES11,                                                \\\n                             JSON_ENCODE_ENTRIES10,                                                \\\n                             JSON_ENCODE_ENTRIES9,                                                 \\\n                             JSON_ENCODE_ENTRIES8,                                                 \\\n                             JSON_ENCODE_ENTRIES7,                                                 \\\n                             JSON_ENCODE_ENTRIES6,                                                 \\\n                             JSON_ENCODE_ENTRIES5,                                                 \\\n                             JSON_ENCODE_ENTRIES4,                                                 \\\n                             JSON_ENCODE_ENTRIES3,                                                 \\\n                             JSON_ENCODE_ENTRIES2,                                                 \\\n                             JSON_ENCODE_ENTRY))                                                   \\\n    (out, prefix, __VA_ARGS__);                                                                    \\\n    out.EndObject()\n\n#define JSON_DECODE_ENTRIES(in, prefix, ...)                                                       \\\n    dverify(in.IsObject());                                                                        \\\n    int arguments_count = 0;                                                                       \\\n    int parsed_count = 0;                                                                          \\\n    JSON_ENTRIES_GET_MACRO_((__VA_ARGS__,                                                          \\\n                             JSON_DECODE_ENTRIES14,                                                \\\n                             JSON_DECODE_ENTRIES13,                                                \\\n                             JSON_DECODE_ENTRIES12,                                                \\\n                             JSON_DECODE_ENTRIES11,                                                \\\n                             JSON_DECODE_ENTRIES10,                                                \\\n                             JSON_DECODE_ENTRIES9,                                                 \\\n                             JSON_DECODE_ENTRIES8,                                                 \\\n                             JSON_DECODE_ENTRIES7,                                                 \\\n                             JSON_DECODE_ENTRIES6,                                                 \\\n                             JSON_DECODE_ENTRIES5,                                                 \\\n                             JSON_DECODE_ENTRIES4,                                                 \\\n                             JSON_DECODE_ENTRIES3,                                                 \\\n                             JSON_DECODE_ENTRIES2,                                                 \\\n                             JSON_DECODE_ENTRY))                                                   \\\n    (in, prefix, __VA_ARGS__);                                                                     \\\n    return parsed_count == arguments_count || parsed_count == in.MemberCount();\n\n#define DEFINE_JSON_SERIALIZATION(...)                                                             \\\n    void encode_json_state(std::ostream &os)                                                       \\\n    {                                                                                              \\\n        rapidjson::OStreamWrapper wrapper(os);                                                     \\\n        dsn::json::JsonWriter w(wrapper);                                                          \\\n        encode_json_state(w);                                                                      \\\n    }                                                                                              \\\n    void encode_json_state(dsn::json::JsonWriter &out) const                                       \\\n    {                                                                                              \\\n        JSON_ENCODE_ENTRIES(out, *this, __VA_ARGS__);                                              \\\n    }                                                                                              \\\n    bool decode_json_state(const dsn::json::JsonObject &in)                                        \\\n    {                                                                                              \\\n        JSON_DECODE_ENTRIES(in, *this, __VA_ARGS__);                                               \\\n    }\n\n#define NON_MEMBER_JSON_SERIALIZATION(type, ...)                                                   \\\n    inline void json_encode(dsn::json::JsonWriter &output, const type &t)                          \\\n    {                                                                                              \\\n        JSON_ENCODE_ENTRIES(output, t, __VA_ARGS__);                                               \\\n    }                                                                                              \\\n    inline bool json_decode(const dsn::json::JsonObject &input, type &t)                           \\\n    {                                                                                              \\\n        JSON_DECODE_ENTRIES(input, t, __VA_ARGS__);                                                \\\n    }\n\nnamespace dsn {\nnamespace json {\n\ntypedef rapidjson::GenericValue<rapidjson::UTF8<>> JsonObject;\ntypedef rapidjson::Writer<rapidjson::OStreamWrapper> JsonWriter;\ntypedef rapidjson::PrettyWriter<rapidjson::OStreamWrapper> PrettyJsonWriter;\n\ntemplate <typename>\nclass json_forwarder;\n\n// json serialization for string types.\n// please notice when we call rapidjson::Writer::String, with 3rd parameter with \"true\",\n// which means that we will COPY string to writer\ntemplate <typename Writer>\nvoid json_encode(Writer &out, const std::string &str)\n{\n    out.String(str.c_str(), str.length(), true);\n}\ninline void json_encode(JsonWriter &out, const char *str) { out.String(str, strlen(str), true); }\ninline bool json_decode(const JsonObject &in, std::string &str)\n{\n    dverify(in.IsString());\n    str = in.GetString();\n    return true;\n}\n\ninline void json_encode(JsonWriter &out, const error_code &err)\n{\n    const char *str = err.to_string();\n    out.String(str, strlen(str), true);\n}\ninline bool json_decode(const JsonObject &in, error_code &err)\n{\n    dverify(in.IsString());\n    err = error_code(in.GetString());\n    return true;\n}\n\n// json serialization for bool types.\n// for compatibility, we treat bool as integers, which is not this case in json standard\ninline void json_encode(JsonWriter &out, bool t) { out.Int(t ? 1 : 0); }\ninline bool json_decode(const JsonObject &in, bool &t)\n{\n    if (in.IsInt()) {\n        int ans = in.GetInt();\n        t = (ans != 0);\n        return true;\n    } else if (in.IsBool()) {\n        t = in.GetBool();\n        return true;\n    }\n    return false;\n}\n\n// json serialization for double types\ninline void json_encode(JsonWriter &out, double d) { out.Double(d); }\ninline bool json_decode(const JsonObject &in, double &t)\n{\n    if (in.IsDouble()) {\n        t = in.GetDouble();\n        return true;\n    } else if (in.IsInt64()) {\n        t = (double)in.GetInt64();\n        return true;\n    } else if (in.IsUint64()) {\n        t = (double)in.GetUint64();\n        return true;\n    }\n    return false;\n}\n\n// json serialization for int types\n#define INT_TYPE_SERIALIZATION(TName)                                                              \\\n    inline void json_encode(JsonWriter &out, TName t) { out.Int64((int64_t)t); }                   \\\n    inline bool json_decode(const JsonObject &in, TName &t)                                        \\\n    {                                                                                              \\\n        dverify(in.IsInt64());                                                                     \\\n        int64_t ans = in.GetInt64();                                                               \\\n        dverify(ans >= std::numeric_limits<TName>::min() &&                                        \\\n                ans <= std::numeric_limits<TName>::max());                                         \\\n        t = (TName)ans;                                                                            \\\n        return true;                                                                               \\\n    }\n\nINT_TYPE_SERIALIZATION(int8_t)\nINT_TYPE_SERIALIZATION(int16_t)\nINT_TYPE_SERIALIZATION(int32_t)\nINT_TYPE_SERIALIZATION(int64_t)\n\n// json serialization for uint types\n#define UINT_TYPE_SERIALIZATION(TName)                                                             \\\n    inline void json_encode(JsonWriter &out, TName t) { out.Uint64((uint64_t)t); }                 \\\n    inline bool json_decode(const JsonObject &in, TName &t)                                        \\\n    {                                                                                              \\\n        dverify(in.IsUint64());                                                                    \\\n        int64_t ans = in.GetUint64();                                                              \\\n        dverify(ans >= std::numeric_limits<TName>::min() &&                                        \\\n                ans <= std::numeric_limits<TName>::max());                                         \\\n        t = (TName)ans;                                                                            \\\n        return true;                                                                               \\\n    }\n\nUINT_TYPE_SERIALIZATION(uint8_t)\nUINT_TYPE_SERIALIZATION(uint16_t)\nUINT_TYPE_SERIALIZATION(uint32_t)\nUINT_TYPE_SERIALIZATION(uint64_t)\n\n// helper macro for enum types, we treat all enums as string\n#define ENUM_TYPE_SERIALIZATION(EnumType, InvalidEnum)                                             \\\n    inline void json_encode(dsn::json::JsonWriter &out, const EnumType &enum_variable)             \\\n    {                                                                                              \\\n        dsn::json::json_encode(out, enum_to_string(enum_variable));                                \\\n    }                                                                                              \\\n    inline bool json_decode(const dsn::json::JsonObject &in, EnumType &enum_variable)              \\\n    {                                                                                              \\\n        std::string status_message;                                                                \\\n        dverify(dsn::json::json_decode(in, status_message));                                       \\\n        enum_variable = enum_from_string(status_message.c_str(), InvalidEnum);                     \\\n        return true;                                                                               \\\n    }\n\nENUM_TYPE_SERIALIZATION(dsn::replication::partition_status::type,\n                        dsn::replication::partition_status::PS_INVALID)\nENUM_TYPE_SERIALIZATION(dsn::app_status::type, dsn::app_status::AS_INVALID)\nENUM_TYPE_SERIALIZATION(dsn::replication::bulk_load_status::type,\n                        dsn::replication::bulk_load_status::BLS_INVALID)\n\n// json serialization for gpid, we treat it as string: \"app_id.partition_id\"\ninline void json_encode(JsonWriter &out, const dsn::gpid &pid)\n{\n    json_encode(out, pid.to_string());\n}\ninline bool json_decode(const dsn::json::JsonObject &in, dsn::gpid &pid)\n{\n    std::string gpid_message;\n    dverify(json_decode(in, gpid_message));\n    return pid.parse_from(gpid_message.c_str());\n}\n\n// json serialization for rpc address, we use the string representation of a address\ninline void json_encode(JsonWriter &out, const dsn::rpc_address &address)\n{\n    json_encode(out, address.to_string());\n}\ninline bool json_decode(const dsn::json::JsonObject &in, dsn::rpc_address &address)\n{\n    std::string rpc_address_string;\n    dverify(json_decode(in, rpc_address_string));\n    if (rpc_address_string == \"invalid address\") {\n        return true;\n    }\n    return address.from_string_ipv4(rpc_address_string.c_str());\n}\n\ninline void json_encode(JsonWriter &out, const dsn::partition_configuration &config);\ninline bool json_decode(const JsonObject &in, dsn::partition_configuration &config);\ninline void json_encode(JsonWriter &out, const dsn::app_info &info);\ninline bool json_decode(const JsonObject &in, dsn::app_info &info);\ninline void json_encode(JsonWriter &out, const dsn::replication::file_meta &f_meta);\ninline bool json_decode(const JsonObject &in, dsn::replication::file_meta &f_meta);\ninline void json_encode(JsonWriter &out, const dsn::replication::bulk_load_metadata &metadata);\ninline bool json_decode(const JsonObject &in, dsn::replication::bulk_load_metadata &metadata);\n\ntemplate <typename T>\ninline void json_encode_iterable(JsonWriter &out, const T &t)\n{\n    out.StartArray();\n    for (auto it = t.begin(); it != t.end(); ++it) {\n        json_forwarder<typename std::decay<decltype(*it)>::type>::encode(out, *it);\n    }\n    out.EndArray();\n}\n\ntemplate <typename T>\ninline void json_encode_map(JsonWriter &out, const T &t)\n{\n    out.StartObject();\n    for (auto it = t.begin(); it != t.end(); ++it) {\n        // please notice that in json's standard, all keys must be string\n        std::string key_string = boost::lexical_cast<std::string>(it->first);\n        out.Key(key_string.c_str(), key_string.size(), true);\n        json_forwarder<typename std::decay<decltype(it->second)>::type>::encode(out, it->second);\n    }\n    out.EndObject();\n}\n\ntemplate <typename TMap>\ninline bool json_decode_map(const JsonObject &in, TMap &t)\n{\n    dverify(in.IsObject());\n    t.clear();\n    for (rapidjson::Value::ConstMemberIterator it = in.MemberBegin(); it != in.MemberEnd(); ++it) {\n        typename TMap::key_type key;\n        dverify_exception(key = boost::lexical_cast<typename TMap::key_type>(it->name.GetString()));\n        typename TMap::mapped_type value;\n        dverify(json_forwarder<decltype(value)>::decode(it->value, value));\n        if (!t.emplace(key, value).second)\n            return false;\n    }\n    return true;\n}\n\ntemplate <typename T>\ninline void json_encode(JsonWriter &out, const std::vector<T> &t)\n{\n    json_encode_iterable(out, t);\n}\n\ntemplate <typename T>\ninline bool json_decode(const JsonObject &in, std::vector<T> &t)\n{\n    dverify(in.IsArray());\n    t.clear();\n    t.reserve(in.Size());\n\n    for (rapidjson::Value::ConstValueIterator it = in.Begin(); it != in.End(); ++it) {\n        T value;\n        dverify(json_forwarder<T>::decode(*it, value));\n        t.emplace_back(std::move(value));\n    }\n    return true;\n}\n\ntemplate <typename T>\ninline void json_encode(JsonWriter &out, const std::set<T> &t)\n{\n    json_encode_iterable(out, t);\n}\n\ntemplate <typename T>\ninline bool json_decode(const JsonObject &in, std::set<T> &t)\n{\n    dverify(in.IsArray());\n    t.clear();\n\n    for (rapidjson::Value::ConstValueIterator it = in.Begin(); it != in.End(); ++it) {\n        T value;\n        dverify(json_forwarder<T>::decode(*it, value));\n        dverify(t.emplace(std::move(value)).second);\n    }\n    return true;\n}\n\ntemplate <typename T1, typename T2>\ninline void json_encode(JsonWriter &out, const std::unordered_map<T1, T2> &t)\n{\n    json_encode_map(out, t);\n}\n\ntemplate <typename T1, typename T2>\ninline bool json_decode(const JsonObject &in, std::unordered_map<T1, T2> &t)\n{\n    return json_decode_map(in, t);\n}\n\ntemplate <typename T1, typename T2>\ninline void json_encode(JsonWriter &out, const std::map<T1, T2> &t)\n{\n    json_encode_map(out, t);\n}\n\ntemplate <typename T1, typename T2>\ninline bool json_decode(const JsonObject &in, std::map<T1, T2> &t)\n{\n    return json_decode_map(in, t);\n}\n\ntemplate <typename T>\ninline void json_encode(JsonWriter &out, const dsn::ref_ptr<T> &t)\n{\n    // when a smart ptr is encoded, caller should ensure the ptr is not nullptr\n    // TODO: encoded to null?\n    assert(t.get() != nullptr);\n    json_encode(out, *t);\n}\n\ntemplate <typename T>\ninline bool json_decode(const JsonObject &in, dsn::ref_ptr<T> &t)\n{\n    t = new T();\n    return json_decode(in, *t);\n}\n\ntemplate <typename T>\ninline void json_encode(JsonWriter &out, const std::shared_ptr<T> &t)\n{\n    // when a smart ptr is encoded, caller should ensure the ptr is not nullptr\n    // TODO: encoded to null?\n    assert(t.get() != nullptr);\n    json_encode(out, *t);\n}\n\ntemplate <typename T>\ninline bool json_decode(const JsonObject &in, std::shared_ptr<T> &t)\n{\n    t.reset(new T());\n    return json_decode(in, *t);\n}\n\ntemplate <typename T>\nclass json_forwarder\n{\nprivate:\n    // check if C has C.encode_json_state(dsn::json::JsonWriter&) function\n    template <typename C>\n    static auto check_json_state(C *) -> typename std::is_same<\n        decltype(std::declval<C>().encode_json_state(std::declval<dsn::json::JsonWriter &>())),\n        void>::type;\n\n    template <typename>\n    static std::false_type check_json_state(...);\n\n    // check if C has C->json_state(dsn::json::JsonWriter&) function\n    template <typename C>\n    static auto p_check_json_state(C *) -> typename std::is_same<\n        decltype(std::declval<C>()->encode_json_state(std::declval<dsn::json::JsonWriter &>())),\n        void>::type;\n\n    template <typename>\n    static std::false_type p_check_json_state(...);\n\n    typedef decltype(check_json_state<T>(0)) has_json_state;\n    typedef decltype(p_check_json_state<T>(0)) p_has_json_state;\n\n    // internal serialization\n    static void encode_inner(JsonWriter &out, const T &t, std::true_type, std::false_type)\n    {\n        t.encode_json_state(out);\n    }\n    static void encode_inner(JsonWriter &out, const T &t, std::false_type, std::true_type)\n    {\n        t->encode_json_state(out);\n    }\n    static void encode_inner(JsonWriter &out, const T &t, std::true_type, std::true_type)\n    {\n        t->encode_json_state(out);\n    }\n    static void encode_inner(JsonWriter &out, const T &t, std::false_type, std::false_type)\n    {\n        json_encode(out, t);\n    }\n\n    // internal deserialization\n    static bool decode_inner(const JsonObject &in, T &t, std::true_type, std::false_type)\n    {\n        return t.decode_json_state(in);\n    }\n    static bool decode_inner(const JsonObject &in, T &t, std::false_type, std::true_type)\n    {\n        return t->decode_json_state(in);\n    }\n    static bool decode_inner(const JsonObject &in, T &t, std::true_type, std::true_type)\n    {\n        return t->decode_json_state(in);\n    }\n    static bool decode_inner(const JsonObject &in, T &t, std::false_type, std::false_type)\n    {\n        return json_decode(in, t);\n    }\n\npublic:\n    static void encode(JsonWriter &out, const T &t)\n    {\n        encode_inner(out, t, has_json_state{}, p_has_json_state{});\n    }\n    static void encode(std::ostream &os, const T &t)\n    {\n        rapidjson::OStreamWrapper wrapper(os);\n        JsonWriter writer(wrapper);\n        encode(writer, t);\n    }\n    static dsn::blob encode(const T &t)\n    {\n        std::ostringstream os;\n        encode(os, t);\n        return blob::create_from_bytes(os.str());\n    }\n\n    static bool decode(const JsonObject &in, T &t)\n    {\n        return decode_inner(in, t, has_json_state{}, p_has_json_state{});\n    }\n    static bool decode(const dsn::blob &bb, T &t)\n    {\n        rapidjson::Document doc;\n        dverify(!doc.Parse(bb.data(), bb.length()).HasParseError());\n        return decode(doc, t);\n    }\n\n    // decode the member that's const qualified.\n    static bool decode(const JsonObject &in, const T &t)\n    {\n        using MutableT = typename std::remove_const<T>::type;\n        return decode(in, const_cast<MutableT &>(t));\n    }\n    static bool decode(const dsn::blob &bb, const T &t)\n    {\n        using MutableT = typename std::remove_const<T>::type;\n        return decode(bb, const_cast<MutableT &>(t));\n    }\n};\n\nNON_MEMBER_JSON_SERIALIZATION(dsn::partition_configuration,\n                              pid,\n                              ballot,\n                              max_replica_count,\n                              primary,\n                              secondaries,\n                              last_drops,\n                              last_committed_decree,\n                              partition_flags)\n\nNON_MEMBER_JSON_SERIALIZATION(dsn::app_info,\n                              status,\n                              app_type,\n                              app_name,\n                              app_id,\n                              partition_count,\n                              envs,\n                              is_stateful,\n                              max_replica_count,\n                              expire_second,\n                              create_second,\n                              drop_second,\n                              duplicating,\n                              init_partition_count,\n                              is_bulk_loading)\n\nNON_MEMBER_JSON_SERIALIZATION(dsn::replication::file_meta, name, size, md5)\n\nNON_MEMBER_JSON_SERIALIZATION(dsn::replication::bulk_load_metadata, files, file_total_size)\n} // namespace json\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/message_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/string_view.h>\n#include <dsn/utility/binary_writer.h>\n#include <dsn/utility/binary_reader.h>\n#include <dsn/cpp/rpc_stream.h>\n#include <dsn/cpp/serialization.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\nnamespace dsn {\n\n/// Move the content inside message `m` into a blob.\ninline blob move_message_to_blob(message_ex *m)\n{\n    rpc_read_stream reader(m);\n    return reader.get_buffer();\n}\n\n/// Convert a blob into a message for reading(unmarshalling).\n/// This function is identical with dsn::message_ex::create_received_request,\n/// however it passes a blob to ensure ownership safety instead of\n/// passing simply a constant view.\n/// MUST released manually later using dsn::message_ex::release_ref.\nextern message_ex *\nfrom_blob_to_received_msg(task_code rpc_code,\n                          const blob &bb,\n                          int thread_hash = 0,\n                          uint64_t partition_hash = 0,\n                          dsn_msg_serialize_format serialization_type = DSF_THRIFT_BINARY);\ninline message_ex *\nfrom_blob_to_received_msg(task_code rpc_code,\n                          blob &&bb,\n                          int thread_hash = 0,\n                          uint64_t partition_hash = 0,\n                          dsn_msg_serialize_format serialization_type = DSF_THRIFT_BINARY)\n{\n    return from_blob_to_received_msg(rpc_code, bb, thread_hash, partition_hash, serialization_type);\n}\n\n/// Convert a thrift request into a dsn message (using binary encoding).\n/// It's useful for unit test, especially when we need to create a fake message\n/// as test input.\ntemplate <typename T>\ninline message_ex *from_thrift_request_to_received_message(const T &thrift_request, task_code tc)\n{\n    binary_writer writer;\n    marshall_thrift_binary(writer, thrift_request);\n    return from_blob_to_received_msg(tc, writer.get_buffer());\n}\n\n/// Convert a blob into a thrift object.\ntemplate <typename T>\ninline void from_blob_to_thrift(const blob &data, T &thrift_obj)\n{\n    binary_reader reader(data);\n    unmarshall_thrift_binary(reader, thrift_obj);\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/pipeline.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\n#include <dsn/tool-api/task_code.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utility/chrono_literals.h>\n#include <dsn/utility/apply.h>\n\nnamespace dsn {\nnamespace pipeline {\n\n// The environment for execution.\nstruct environment\n{\n    template <typename F>\n    void schedule(F &&f, std::chrono::milliseconds delay_ms = 0_ms) const\n    {\n        tasking::enqueue(__conf.thread_pool_code,\n                         __conf.tracker,\n                         std::forward<F>(f),\n                         __conf.thread_hash,\n                         delay_ms);\n    }\n\n    /// Fluent APIs to specify the environment configuration.\n    environment &thread_pool(task_code tc)\n    {\n        __conf.thread_pool_code = tc;\n        return *this;\n    }\n    environment &thread_hash(int hash)\n    {\n        __conf.thread_hash = hash;\n        return *this;\n    }\n    environment &task_tracker(dsn::task_tracker *tracker)\n    {\n        __conf.tracker = tracker;\n        return *this;\n    }\n\n    struct\n    {\n        task_code thread_pool_code;\n        dsn::task_tracker *tracker{nullptr};\n        int thread_hash{0};\n    } __conf;\n};\n\ntemplate <typename... Args>\nstruct result\n{\n    typedef std::tuple<Args...> ArgsTupleType;\n\n    // Step down to next stage.\n    // NOTE: Remember to exit from caller function after `step_down_next_stage`.\n    // For example:\n    //\n    // ```\n    //   pipeline::base base;\n    //   ping_rpc rpc = ...;\n    //\n    //   pipeline::do_when<> ping([&ping]() {\n    //        bool ok = rpc.call();\n    //        if(ok) {\n    //            step_down_next_stage();\n    //            // when steps out, it goes to repeat this stage round and round.\n    //        }\n    //        repeat(1_s); // will repeat even after stepping down to next stage.\n    //   });\n    //\n    //   base.thread_pool(LPC_DUPLICATE_MUTATIONS).task_tracker(&tracker).from(&s1);\n    //   base.run_pipeline();\n    //   base.wait_all();\n    // ```\n    //\n    // To fix the problem, return immediately after `step_down_next_stage`.\n    //\n    // ```\n    //   pipeline::do_when<> ping([&ping]() {\n    //        bool ok = rpc.call();\n    //        if(ok) {\n    //            step_down_next_stage();\n    //            return;\n    //        }\n    //        repeat(1_s); // will repeat even after stepping down to next stage.\n    //   });\n    // ```\n    //\n    void step_down_next_stage(Args &&... args)\n    {\n        dassert(__func != nullptr, \"no next stage is linked\");\n        __func(std::make_tuple(std::forward<Args>(args)...));\n    }\n\n    std::function<void(ArgsTupleType &&)> __func;\n};\n\n//\n// Example:\n//\n// ```\n//   pipeline::base base;\n//\n//   pipeline::do_when<> s1([&s1]() { s1.repeat(1_s); });\n//   base.thread_pool(LPC_DUPLICATE_MUTATIONS).task_tracker(&tracker).from(&s1);\n//\n//   base.run_pipeline();\n//   base.pause();\n//   base.wait_all();\n// ```\n//\nstruct base : environment\n{\n    // Start this pipeline.\n    // NOTE: Be careful when pipeline starting and pausing are running concurrently,\n    //       though it's internally synchronized, the actual order is still non-deterministic\n    //       from the user's view.\n    //\n    // ```\n    //   base.schedule([&base]() { base.run_pipeline(); });\n    //   base.pause();\n    //   base.wait_all(); // the pipeline won't stop.\n    // ```\n    //\n    void run_pipeline();\n\n    void pause() { _paused.store(true, std::memory_order_release); }\n\n    bool paused() const { return _paused.load(std::memory_order_acquire); }\n\n    // Await for all running tasks to complete.\n    void wait_all() { __conf.tracker->wait_outstanding_tasks(); }\n    void cancel_all() { __conf.tracker->cancel_outstanding_tasks(); }\n\n    /// === Pipeline Declaration === ///\n    /// Declaration of pipeline is not thread-safe.\n\n    template <typename Stage>\n    struct node\n    {\n        // pipeline supports cyclic execution.\n        // For example in \"data verifier\", we insert data into database, and verify\n        // that it is applied successfully. After verification we make next insert.\n        //\n        // ```\n        //      _insert = dsn::make_unique<insert_data>(...);\n        //      _verify = dsn::make_unique<verify_data>(...);\n        //      link(*_insert).link(*_verify).link(*_insert);\n        // ```\n        //\n        // Here we construct a infinite loop.\n        // When first `_insert` steps down to `_verify`, it directly calls the function\n        // `_verify->run(...)`.\n        // However when `_verify` is stepping down, in order to avoid infinite recursion\n        // which will cause stack overflow, it calls `_insert->async(..)`, which enqueues\n        // a new task into rdsn task engine.\n        template <typename NextStage>\n        node<NextStage> link(NextStage &next)\n        {\n            using ArgsTupleType = typename Stage::ArgsTupleType;\n\n            // link to node of existing pipeline\n            if (next.__pipeline != nullptr) {\n                this_stage->__func = [next_ptr = &next](ArgsTupleType && args) mutable\n                {\n                    dsn::apply(&NextStage::async,\n                               std::tuple_cat(std::make_tuple(next_ptr), std::move(args)));\n                };\n            } else {\n                next.__conf = this_stage->__conf;\n                next.__pipeline = this_stage->__pipeline;\n                this_stage->__func = [next_ptr = &next](ArgsTupleType && args) mutable\n                {\n                    if (next_ptr->paused()) {\n                        return;\n                    }\n                    dsn::apply(&NextStage::run,\n                               std::tuple_cat(std::make_tuple(next_ptr), std::move(args)));\n                };\n            }\n            return node<NextStage>(&next);\n        }\n\n        explicit node(Stage *s) : this_stage(s) {}\n\n    private:\n        Stage *this_stage;\n    };\n\n    template <typename Stage>\n    node<Stage> from(Stage &start)\n    {\n        start.__conf = __conf;\n        start.__pipeline = this;\n        _root_stage = &start;\n        return node<Stage>(&start);\n    }\n\n    // Create a fork of the pipeline, which shares the same task tracker,\n    // but with different thread pool, thread hash.\n    template <typename NextStage>\n    node<NextStage> fork(NextStage &next, task_code tc, int thread_hash)\n    {\n        next.__conf.thread_pool_code = tc;\n        next.__conf.thread_hash = thread_hash;\n        next.__conf.tracker = __conf.tracker;\n\n        next.__pipeline = this;\n        return node<NextStage>(&next);\n    }\n\nprivate:\n    environment *_root_stage{nullptr};\n    std::atomic_bool _paused{true};\n};\n\n// A piece of execution, receiving argument `Args`, running in the environment\n// created by `pipeline::base`.\ntemplate <typename... Args>\nstruct when : environment\n{\n    /// Run this stage within current context.\n    virtual void run(Args &&... in) = 0;\n\n    void repeat(Args &&... in, std::chrono::milliseconds delay_ms = 0_ms)\n    {\n        auto arg_tuple = std::make_tuple(this, std::forward<Args>(in)...);\n        schedule([ this, args = std::move(arg_tuple) ]() mutable {\n            if (paused()) {\n                return;\n            }\n            dsn::apply(&when<Args...>::run, std::move(args));\n        },\n                 delay_ms);\n    }\n\n    /// Run this stage asynchronously in its environment.\n    void async(Args &&... in) { repeat(std::forward<Args>(in)...); }\n\n    bool paused() const { return __pipeline->paused(); }\n\n    base *__pipeline{nullptr};\n};\n\ninline void base::run_pipeline()\n{\n    dassert(__conf.tracker != nullptr, \"must configure task tracker\");\n\n    _paused.store(false, std::memory_order_release);\n\n    schedule([stage = static_cast<when<> *>(_root_stage)]() {\n        // static_cast for downcast, but completely safe.\n        stage->run();\n    });\n}\n\n/// A simple utility for definition of a `when` using lambda.\n/// It's useful for unit test.\ntemplate <typename... Args>\nstruct do_when : when<Args...>\n{\n    explicit do_when(std::function<void(Args &&... args)> &&func) : _cb(std::move(func)) {}\n\n    void run(Args &&... args) override { _cb(std::forward<Args>(args)...); }\n\n    virtual ~do_when() = default;\n\nprivate:\n    std::function<void(Args &&...)> _cb;\n};\n\n/// Runnable must extend from pipeline::environment and implement\n/// a public method: `void run();`\ntemplate <typename Runnable>\nstatic void repeat(Runnable &&r, std::chrono::milliseconds delay_ms = 0_ms)\n{\n    environment env = r;\n    env.schedule([r = std::move(r)]() mutable { r.run(); }, delay_ms);\n}\n\n} // namespace pipeline\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/rpc_holder.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n#include <dsn/c/api_layer1.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/chrono_literals.h>\n#include <dsn/dist/replication/partition_resolver.h>\n\nnamespace dsn {\n\nusing literals::chrono_literals::operator\"\" _ms;\n\n//\n// rpc_holder is mainly designed for RAII of message_ex*.\n// Since the request message will be automatically released after the rpc ends,\n// it will become inaccessible when you use it in an async call (probably via tasking::enqueue).\n// So in rpc_holder we hold another reference of the message, preventing it to be deleted.\n//\n// rpc_holder also provides a simple approach for rpc mocking.\n// For example:\n//\n//   typedef rpc_holder<write_request, write_response> write_rpc;\n//\n//   void write() {\n//       ....\n//       auto request = make_unique<write_request>();\n//       request->data = \"abc\";\n//       request->timestamp = 12;\n//       write_rpc rpc(std::move(request), RPC_WRITE);\n//       rpc.call(rpc_address(\"10.57.223.31\", 12321), nullptr, on_write_rpc_reply);\n//       ...\n//   }\n//\n//   RPC_MOCKING(write_rpc) {\n//       write();\n//       ASSERT_EQ(1, write_rpc::mail_box().size());\n//   }\n//\n// Here in the instance of `RPC_MOCKING`, we call a function `write`, in which a write_rpc\n// was sent to \"10.57.223.31:12321\". However, since we are in the mock mode, every `write_request`\n// message will be dropped into `write_rpc::mail_box` without going through network.\n//\n\ntemplate <typename TRequest, typename TResponse>\nclass rpc_holder\n{\npublic:\n    using request_type = TRequest;\n    using response_type = TResponse;\n\npublic:\n    explicit rpc_holder(message_ex *req = nullptr)\n    {\n        if (req != nullptr) {\n            _i = std::make_shared<internal>(req);\n        }\n    }\n\n    rpc_holder(std::unique_ptr<TRequest> req,\n               task_code code,\n               std::chrono::milliseconds timeout = 0_ms,\n               uint64_t partition_hash = 0,\n               int thread_hash = 0)\n        : _i(new internal(req, code, timeout, partition_hash, thread_hash))\n    {\n    }\n\n    // copyable and movable\n    // Copying an rpc_holder doesn't produce a deep copy, the new instance will\n    // reference the same rpc internal data. So, just feel free to copy :)\n    rpc_holder(const rpc_holder &) = default;\n    rpc_holder(rpc_holder &&) noexcept = default;\n    rpc_holder &operator=(const rpc_holder &) = default;\n    rpc_holder &operator=(rpc_holder &&) noexcept = default;\n\n    bool is_initialized() const { return bool(_i); }\n\n    const TRequest &request() const\n    {\n        dassert(_i, \"rpc_holder is uninitialized\");\n        return *(_i->thrift_request);\n    }\n\n    TRequest *mutable_request() const\n    {\n        dassert(_i, \"rpc_holder is uninitialized\");\n        return _i->thrift_request.get();\n    }\n\n    TResponse &response() const\n    {\n        dassert(_i, \"rpc_holder is uninitialized\");\n        return _i->thrift_response;\n    }\n\n    dsn::error_code &error() const\n    {\n        dassert(_i, \"rpc_holder is uninitialized\");\n        return _i->rpc_error;\n    }\n\n    message_ex *dsn_request() const\n    {\n        dassert(_i, \"rpc_holder is uninitialized\");\n        return _i->dsn_request;\n    }\n\n    // the remote address where reveice request from and send response to.\n    rpc_address remote_address() const { return dsn_request()->header->from_address; }\n\n    // TCallback = void(error_code)\n    // NOTE that the `error_code` is not the error carried by response. Users should\n    // check the responded error themselves.\n    template <typename TCallback>\n    task_ptr call(const rpc_address &server,\n                  task_tracker *tracker,\n                  TCallback &&callback,\n                  int reply_thread_hash = 0)\n    {\n        // ensures that TCallback receives exactly one argument, which must be a error_code.\n        static_assert(function_traits<TCallback>::arity == 1,\n                      \"TCallback must receive exactly one argument\");\n        static_assert(\n            std::is_same<typename function_traits<TCallback>::template arg_t<0>, error_code>::value,\n            \"the first argument of TCallback must be error_code\");\n\n        if (dsn_unlikely(_mail_box != nullptr)) {\n            _mail_box->emplace_back(*this);\n            return nullptr;\n        }\n\n        rpc_response_task_ptr t = rpc::create_rpc_response_task(\n            dsn_request(),\n            tracker,\n            [ cb_fwd = std::forward<TCallback>(callback),\n              rpc = *this ](error_code err, message_ex * req, message_ex * resp) mutable {\n                if (err == ERR_OK) {\n                    unmarshall(resp, rpc.response());\n                }\n                cb_fwd(err);\n            },\n            reply_thread_hash);\n        dsn_rpc_call(server, t);\n        return t;\n    }\n\n    template <typename TCallback>\n    task_ptr call(replication::partition_resolver_ptr &resolver,\n                  task_tracker *tracker,\n                  TCallback &&callback,\n                  int reply_thread_hash = 0)\n    {\n        static_assert(function_traits<TCallback>::arity == 1,\n                      \"TCallback must receive exactly one argument\");\n        static_assert(\n            std::is_same<typename function_traits<TCallback>::template arg_t<0>, error_code>::value,\n            \"the first argument of TCallback must be error_code\");\n\n        if (dsn_unlikely(_mail_box != nullptr)) {\n            _mail_box->emplace_back(*this);\n            return nullptr;\n        }\n\n        rpc_response_task_ptr t = rpc::create_rpc_response_task(\n            dsn_request(),\n            tracker,\n            [ cb_fwd = std::forward<TCallback>(callback),\n              rpc = *this ](error_code err, message_ex * req, message_ex * resp) mutable {\n                if (err == ERR_OK) {\n                    unmarshall(resp, rpc.response());\n                }\n                cb_fwd(err);\n            },\n            reply_thread_hash);\n        resolver->call_task(t);\n        return t;\n    }\n\n    void forward(const rpc_address &addr)\n    {\n        _i->auto_reply = false;\n        if (dsn_unlikely(_forward_mail_box != nullptr)) {\n            dsn_request()->header->from_address = addr;\n            _forward_mail_box->emplace_back(*this);\n            return;\n        }\n\n        dsn_rpc_forward(dsn_request(), addr);\n    }\n\n    // Returns an rpc_holder that will reply the request after its lifetime ends.\n    // By default rpc_holder never replies.\n    // SEE: serverlet<T>::register_rpc_handler_with_rpc_holder\n    static inline rpc_holder auto_reply(message_ex *req)\n    {\n        rpc_holder rpc(req);\n        rpc._i->auto_reply = true;\n        return rpc;\n    }\n\n    // Only use this function when testing.\n    // In mock mode, all messages will be dropped into mail_box without going through network,\n    // and response callbacks will never be called.\n    // This function is not thread-safe.\n    using mail_box_t = std::vector<rpc_holder<TRequest, TResponse>>;\n    using mail_box_u_ptr = std::unique_ptr<mail_box_t>;\n    static void enable_mocking()\n    {\n        dassert(_mail_box == nullptr && _forward_mail_box == nullptr,\n                \"remember to call clear_mocking_env after testing\");\n        _mail_box = make_unique<mail_box_t>();\n        _forward_mail_box = make_unique<mail_box_t>();\n    }\n\n    // Only use this function when testing.\n    // Remember to call it after test finishes, or it may effect the results of other tests.\n    // This function is not thread-safe.\n    static void clear_mocking_env()\n    {\n        _mail_box.reset(nullptr);\n        _forward_mail_box.reset(nullptr);\n    }\n\n    static mail_box_t &mail_box()\n    {\n        dassert(_mail_box != nullptr, \"call this function only when you are in mock mode\");\n        return *_mail_box.get();\n    }\n\n    static mail_box_t &forward_mail_box()\n    {\n        dassert(_forward_mail_box != nullptr, \"call this function only when you are in mock mode\");\n        return *_forward_mail_box.get();\n    }\n\n    friend bool operator<(const rpc_holder &lhs, const rpc_holder &rhs) { return lhs._i < rhs._i; }\n\nprivate:\n    friend class rpc_holder_test;\n\n    struct internal\n    {\n        explicit internal(message_ex *req)\n            : dsn_request(req), thrift_request(make_unique<TRequest>()), auto_reply(false)\n        {\n            // we must hold one reference for the request, or rdsn will delete it after\n            // the rpc call ends.\n            dsn_request->add_ref();\n            unmarshall(req, *thrift_request);\n        }\n\n        internal(std::unique_ptr<TRequest> &req,\n                 task_code code,\n                 std::chrono::milliseconds timeout,\n                 uint64_t partition_hash,\n                 int thread_hash)\n            : thrift_request(std::move(req)), auto_reply(false)\n        {\n            dassert(thrift_request != nullptr, \"req should not be null\");\n\n            dsn_request = message_ex::create_request(\n                code, static_cast<int>(timeout.count()), thread_hash, partition_hash);\n            dsn_request->add_ref();\n            marshall(dsn_request, *thrift_request);\n        }\n\n        void reply()\n        {\n            if (dsn_unlikely(_mail_box != nullptr)) {\n                rpc_holder<TRequest, TResponse> rpc(std::move(thrift_request),\n                                                    dsn_request->rpc_code());\n                rpc.response() = std::move(thrift_response);\n                _mail_box->emplace_back(std::move(rpc));\n                return;\n            }\n\n            message_ex *dsn_response = dsn_request->create_response();\n            marshall(dsn_response, thrift_response);\n            dsn_rpc_reply(dsn_response, rpc_error);\n        }\n\n        ~internal()\n        {\n            if (auto_reply) {\n                reply();\n            }\n            dsn_request->release_ref();\n        }\n\n        message_ex *dsn_request;\n        std::unique_ptr<TRequest> thrift_request;\n        TResponse thrift_response;\n        dsn::error_code rpc_error = dsn::ERR_OK;\n\n        bool auto_reply;\n    };\n\n    std::shared_ptr<internal> _i;\n\n    static mail_box_u_ptr _mail_box;\n    static mail_box_u_ptr _forward_mail_box;\n};\n\n// ======== type traits ========\n\n// check if a given type is rpc_holder.\n// is_rpc_holder<T>::value = true indicates that type T is an rpc_holder.\n\ntemplate <typename T>\nstruct is_rpc_holder : public std::false_type\n{\n};\n\ntemplate <typename TRequest, typename TResponse>\nstruct is_rpc_holder<rpc_holder<TRequest, TResponse>> : public std::true_type\n{\n};\n\n// ======== utilities ========\n\nnamespace rpc {\n\n// call an RPC specified by rpc_holder.\n// TCallback = void(error_code)\n\ntemplate <typename TCallback, typename TRpcHolder>\ntask_ptr call(rpc_address server,\n              TRpcHolder rpc,\n              task_tracker *tracker,\n              TCallback &&callback,\n              int reply_thread_hash = 0)\n{\n    static_assert(is_rpc_holder<TRpcHolder>::value, \"TRpcHolder must be an rpc_holder\");\n    return rpc.call(server, tracker, std::forward<TCallback &&>(callback), reply_thread_hash);\n}\n\n} // namespace rpc\n\n// ======== rpc mock ========\n\ntemplate <typename TRequest, typename TResponse>\ntypename rpc_holder<TRequest, TResponse>::mail_box_u_ptr rpc_holder<TRequest, TResponse>::_mail_box;\ntemplate <typename TRequest, typename TResponse>\ntypename rpc_holder<TRequest, TResponse>::mail_box_u_ptr\n    rpc_holder<TRequest, TResponse>::_forward_mail_box;\n\ntemplate <typename TRpcHolder>\nstruct rpc_mock_wrapper\n{\n    rpc_mock_wrapper() { TRpcHolder::enable_mocking(); }\n    ~rpc_mock_wrapper() { TRpcHolder::clear_mocking_env(); }\n    int counter = 0;\n};\n\n#define RPC_MOCKING(__rpc_type__)                                                                  \\\n    for (::dsn::rpc_mock_wrapper<__rpc_type__> __rpc_type__##_mocking__;                           \\\n         __rpc_type__##_mocking__.counter != 1;                                                    \\\n         __rpc_type__##_mocking__.counter++)\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/rpc_stream.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/binary_reader.h>\n#include <dsn/utility/binary_writer.h>\n\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/service_api_c.h>\n\nnamespace dsn {\n\n// rpc_read_stream is a bridge between binary_reader and rpc_message, with which you can\n// easily visit rpc_message's buffer in binary_reader's manner.\nclass rpc_read_stream : public binary_reader\n{\npublic:\n    rpc_read_stream(message_ex *msg) { set_read_msg(msg); }\n\n    rpc_read_stream() : _msg(nullptr) {}\n\n    void set_read_msg(message_ex *msg)\n    {\n        _msg = msg;\n        if (nullptr != _msg) {\n            ::dsn::blob bb;\n            bool r = ((::dsn::message_ex *)_msg)->read_next(bb);\n            dassert(r, \"read msg must have one segment of buffer ready\");\n\n            init(std::move(bb));\n        }\n    }\n\n    int read(char *buffer, int sz) { return inner_read(buffer, sz); }\n\n    int read(blob &blob, int len) { return inner_read(blob, len); }\n\n    ~rpc_read_stream()\n    {\n        if (_msg) {\n            _msg->read_commit((size_t)(total_size() - get_remaining_size()));\n        }\n    }\n\nprivate:\n    dsn::message_ex *_msg;\n};\ntypedef ::dsn::ref_ptr<rpc_read_stream> rpc_read_stream_ptr;\n\n// rpc_write_stream is a bridge between binary_writer and rpc_message, with which you can\n// easily store data to rpc_message's buffer in binary_writer's manner.\nclass rpc_write_stream : public binary_writer\n{\npublic:\n    rpc_write_stream(message_ex *msg)\n        : _msg(msg), _last_write_next_committed(true), _last_write_next_total_size(0)\n    {\n    }\n\n    // write buffer for rpc_write_stream is allocated from\n    // a per-thread pool, and it is expected that\n    // the per-thread pool cannot allocated two outstanding\n    // buffers at the same time.\n    // e.g., alloc1, commit1, alloc2, commit2 is ok\n    // while alloc1, alloc2, commit2, commit 1 is invalid\n    // TODO(zhaoliwei): remove functions and variables below, because msg is not implemented by tls\n    // memory now\n    void commit_buffer()\n    {\n        if (!_last_write_next_committed) {\n            _msg->write_commit((size_t)(total_size() - _last_write_next_total_size));\n            _last_write_next_committed = true;\n        }\n    }\n\n    virtual ~rpc_write_stream() { flush(); }\n\n    virtual void flush() override\n    {\n        binary_writer::flush();\n        commit_buffer();\n    }\n\nprivate:\n    virtual void create_new_buffer(size_t size, /*out*/ blob &bb) override\n    {\n        commit_buffer();\n\n        void *ptr;\n        size_t sz;\n        _msg->write_next(&ptr, &sz, size);\n        dbg_dassert(sz >= size, \"allocated buffer size must be not less than the required size\");\n        bb.assign((const char *)ptr, 0, (int)sz);\n\n        _last_write_next_total_size = total_size();\n        _last_write_next_committed = false;\n    }\n\nprivate:\n    message_ex *_msg;\n    bool _last_write_next_committed;\n    int _last_write_next_total_size;\n};\ntypedef ::dsn::ref_ptr<rpc_write_stream> rpc_write_stream_ptr;\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/serialization.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/utils.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/cpp/rpc_stream.h>\n#include <dsn/cpp/serialization_helper/thrift_helper.h>\n\nnamespace dsn {\nnamespace serialization {\n\ntemplate <typename T>\nstd::string no_registered_function_error_notice(const T &t, dsn_msg_serialize_format fmt)\n{\n    std::stringstream ss;\n    ss << \"This error occurs because someone is trying to \";\n    ss << \"serialize/deserialize an object of the type \";\n    ss << typeid(t).name();\n    ss << \" but has not registered corresponding serialization/deserialization function for the \"\n          \"format of \";\n    //           ss << enum_to_string(fmt) << \".\";\n    ss << fmt << \".\";\n    return ss.str();\n}\n\n} // namespace serialization\n\ntemplate <typename ThriftType>\ninline void marshall(binary_writer &writer, const ThriftType &value, dsn_msg_serialize_format fmt)\n{\n    switch (fmt) {\n    case DSF_THRIFT_BINARY:\n        marshall_thrift_binary(writer, value);\n        break;\n    case DSF_THRIFT_JSON:\n        marshall_thrift_json(writer, value);\n        break;\n    default:\n        dassert(false, serialization::no_registered_function_error_notice(value, fmt).c_str());\n    }\n}\n\ntemplate <typename ThriftType>\ninline void unmarshall(binary_reader &reader, ThriftType &value, dsn_msg_serialize_format fmt)\n{\n    switch (fmt) {\n    case DSF_THRIFT_BINARY:\n        unmarshall_thrift_binary(reader, value);\n        break;\n    case DSF_THRIFT_JSON:\n        unmarshall_thrift_json(reader, value);\n        break;\n    default:\n        dassert(false, serialization::no_registered_function_error_notice(value, fmt).c_str());\n    }\n}\n\ntemplate <typename T>\ninline void marshall(dsn::message_ex *msg, const T &val)\n{\n    ::dsn::rpc_write_stream writer(msg);\n    marshall(writer, val, (dsn_msg_serialize_format)msg->header->context.u.serialize_format);\n}\n\ntemplate <typename T>\ninline void marshall(dsn::message_ex *msg, const T &val, dsn_msg_serialize_format fmt)\n{\n    ::dsn::rpc_write_stream writer(msg);\n    marshall(writer, val, fmt);\n}\n\ntemplate <typename T>\ninline void unmarshall(dsn::message_ex *msg, /*out*/ T &val)\n{\n    ::dsn::rpc_read_stream reader(msg);\n    unmarshall(reader, val, (dsn_msg_serialize_format)msg->header->context.u.serialize_format);\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/serialization_helper/dsn.layer2_types.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/**\n * Autogenerated by Thrift Compiler (0.9.3)\n *\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n *  @generated\n */\n#ifndef dsn_layer2_TYPES_H\n#define dsn_layer2_TYPES_H\n\n#include <dsn/cpp/serialization_helper/dsn_types.h>\n#include <iosfwd>\n\n#include <thrift/Thrift.h>\n#include <thrift/TApplicationException.h>\n#include <thrift/protocol/TProtocol.h>\n#include <thrift/transport/TTransport.h>\n\n#include <thrift/cxxfunctional.h>\n\nnamespace dsn {\n\nstruct app_status\n{\n    enum type\n    {\n        AS_INVALID = 0,\n        AS_AVAILABLE = 1,\n        AS_CREATING = 2,\n        AS_CREATE_FAILED = 3,\n        AS_DROPPING = 4,\n        AS_DROP_FAILED = 5,\n        AS_DROPPED = 6,\n        AS_RECALLING = 7\n    };\n};\n\nextern const std::map<int, const char *> _app_status_VALUES_TO_NAMES;\n\nclass partition_configuration;\n\nclass configuration_query_by_index_request;\n\nclass configuration_query_by_index_response;\n\nclass app_info;\n\ntypedef struct _partition_configuration__isset\n{\n    _partition_configuration__isset()\n        : pid(false),\n          ballot(false),\n          max_replica_count(false),\n          primary(false),\n          secondaries(false),\n          last_drops(false),\n          last_committed_decree(false),\n          partition_flags(false)\n    {\n    }\n    bool pid : 1;\n    bool ballot : 1;\n    bool max_replica_count : 1;\n    bool primary : 1;\n    bool secondaries : 1;\n    bool last_drops : 1;\n    bool last_committed_decree : 1;\n    bool partition_flags : 1;\n} _partition_configuration__isset;\n\nclass partition_configuration\n{\npublic:\n    partition_configuration(const partition_configuration &);\n    partition_configuration(partition_configuration &&);\n    partition_configuration &operator=(const partition_configuration &);\n    partition_configuration &operator=(partition_configuration &&);\n    partition_configuration()\n        : ballot(0), max_replica_count(0), last_committed_decree(0), partition_flags(0)\n    {\n    }\n\n    virtual ~partition_configuration() throw();\n    ::dsn::gpid pid;\n    int64_t ballot;\n    int32_t max_replica_count;\n    ::dsn::rpc_address primary;\n    std::vector<::dsn::rpc_address> secondaries;\n    std::vector<::dsn::rpc_address> last_drops;\n    int64_t last_committed_decree;\n    int32_t partition_flags;\n\n    _partition_configuration__isset __isset;\n\n    void __set_pid(const ::dsn::gpid &val);\n\n    void __set_ballot(const int64_t val);\n\n    void __set_max_replica_count(const int32_t val);\n\n    void __set_primary(const ::dsn::rpc_address &val);\n\n    void __set_secondaries(const std::vector<::dsn::rpc_address> &val);\n\n    void __set_last_drops(const std::vector<::dsn::rpc_address> &val);\n\n    void __set_last_committed_decree(const int64_t val);\n\n    void __set_partition_flags(const int32_t val);\n\n    bool operator==(const partition_configuration &rhs) const\n    {\n        if (!(pid == rhs.pid))\n            return false;\n        if (!(ballot == rhs.ballot))\n            return false;\n        if (!(max_replica_count == rhs.max_replica_count))\n            return false;\n        if (!(primary == rhs.primary))\n            return false;\n        if (!(secondaries == rhs.secondaries))\n            return false;\n        if (!(last_drops == rhs.last_drops))\n            return false;\n        if (!(last_committed_decree == rhs.last_committed_decree))\n            return false;\n        if (!(partition_flags == rhs.partition_flags))\n            return false;\n        return true;\n    }\n    bool operator!=(const partition_configuration &rhs) const { return !(*this == rhs); }\n\n    bool operator<(const partition_configuration &) const;\n\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    virtual void printTo(std::ostream &out) const;\n};\n\nvoid swap(partition_configuration &a, partition_configuration &b);\n\ninline std::ostream &operator<<(std::ostream &out, const partition_configuration &obj)\n{\n    obj.printTo(out);\n    return out;\n}\n\ntypedef struct _configuration_query_by_index_request__isset\n{\n    _configuration_query_by_index_request__isset() : app_name(false), partition_indices(false) {}\n    bool app_name : 1;\n    bool partition_indices : 1;\n} _configuration_query_by_index_request__isset;\n\nclass configuration_query_by_index_request\n{\npublic:\n    configuration_query_by_index_request(const configuration_query_by_index_request &);\n    configuration_query_by_index_request(configuration_query_by_index_request &&);\n    configuration_query_by_index_request &operator=(const configuration_query_by_index_request &);\n    configuration_query_by_index_request &operator=(configuration_query_by_index_request &&);\n    configuration_query_by_index_request() : app_name() {}\n\n    virtual ~configuration_query_by_index_request() throw();\n    std::string app_name;\n    std::vector<int32_t> partition_indices;\n\n    _configuration_query_by_index_request__isset __isset;\n\n    void __set_app_name(const std::string &val);\n\n    void __set_partition_indices(const std::vector<int32_t> &val);\n\n    bool operator==(const configuration_query_by_index_request &rhs) const\n    {\n        if (!(app_name == rhs.app_name))\n            return false;\n        if (!(partition_indices == rhs.partition_indices))\n            return false;\n        return true;\n    }\n    bool operator!=(const configuration_query_by_index_request &rhs) const\n    {\n        return !(*this == rhs);\n    }\n\n    bool operator<(const configuration_query_by_index_request &) const;\n\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    virtual void printTo(std::ostream &out) const;\n};\n\nvoid swap(configuration_query_by_index_request &a, configuration_query_by_index_request &b);\n\ninline std::ostream &operator<<(std::ostream &out, const configuration_query_by_index_request &obj)\n{\n    obj.printTo(out);\n    return out;\n}\n\ntypedef struct _configuration_query_by_index_response__isset\n{\n    _configuration_query_by_index_response__isset()\n        : err(false), app_id(false), partition_count(false), is_stateful(false), partitions(false)\n    {\n    }\n    bool err : 1;\n    bool app_id : 1;\n    bool partition_count : 1;\n    bool is_stateful : 1;\n    bool partitions : 1;\n} _configuration_query_by_index_response__isset;\n\nclass configuration_query_by_index_response\n{\npublic:\n    configuration_query_by_index_response(const configuration_query_by_index_response &);\n    configuration_query_by_index_response(configuration_query_by_index_response &&);\n    configuration_query_by_index_response &operator=(const configuration_query_by_index_response &);\n    configuration_query_by_index_response &operator=(configuration_query_by_index_response &&);\n    configuration_query_by_index_response() : app_id(0), partition_count(0), is_stateful(0) {}\n\n    virtual ~configuration_query_by_index_response() throw();\n    ::dsn::error_code err;\n    int32_t app_id;\n    int32_t partition_count;\n    bool is_stateful;\n    std::vector<partition_configuration> partitions;\n\n    _configuration_query_by_index_response__isset __isset;\n\n    void __set_err(const ::dsn::error_code &val);\n\n    void __set_app_id(const int32_t val);\n\n    void __set_partition_count(const int32_t val);\n\n    void __set_is_stateful(const bool val);\n\n    void __set_partitions(const std::vector<partition_configuration> &val);\n\n    bool operator==(const configuration_query_by_index_response &rhs) const\n    {\n        if (!(err == rhs.err))\n            return false;\n        if (!(app_id == rhs.app_id))\n            return false;\n        if (!(partition_count == rhs.partition_count))\n            return false;\n        if (!(is_stateful == rhs.is_stateful))\n            return false;\n        if (!(partitions == rhs.partitions))\n            return false;\n        return true;\n    }\n    bool operator!=(const configuration_query_by_index_response &rhs) const\n    {\n        return !(*this == rhs);\n    }\n\n    bool operator<(const configuration_query_by_index_response &) const;\n\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    virtual void printTo(std::ostream &out) const;\n};\n\nvoid swap(configuration_query_by_index_response &a, configuration_query_by_index_response &b);\n\ninline std::ostream &operator<<(std::ostream &out, const configuration_query_by_index_response &obj)\n{\n    obj.printTo(out);\n    return out;\n}\n\ntypedef struct _app_info__isset\n{\n    _app_info__isset()\n        : status(true),\n          app_type(false),\n          app_name(false),\n          app_id(false),\n          partition_count(false),\n          envs(false),\n          is_stateful(false),\n          max_replica_count(false),\n          expire_second(false),\n          create_second(false),\n          drop_second(false),\n          duplicating(true),\n          init_partition_count(true),\n          is_bulk_loading(true)\n    {\n    }\n    bool status : 1;\n    bool app_type : 1;\n    bool app_name : 1;\n    bool app_id : 1;\n    bool partition_count : 1;\n    bool envs : 1;\n    bool is_stateful : 1;\n    bool max_replica_count : 1;\n    bool expire_second : 1;\n    bool create_second : 1;\n    bool drop_second : 1;\n    bool duplicating : 1;\n    bool init_partition_count : 1;\n    bool is_bulk_loading : 1;\n} _app_info__isset;\n\nclass app_info\n{\npublic:\n    app_info(const app_info &);\n    app_info(app_info &&);\n    app_info &operator=(const app_info &);\n    app_info &operator=(app_info &&);\n    app_info()\n        : status((app_status::type)0),\n          app_type(),\n          app_name(),\n          app_id(0),\n          partition_count(0),\n          is_stateful(0),\n          max_replica_count(0),\n          expire_second(0),\n          create_second(0),\n          drop_second(0),\n          duplicating(false),\n          init_partition_count(-1),\n          is_bulk_loading(false)\n    {\n        status = (app_status::type)0;\n    }\n\n    virtual ~app_info() throw();\n    app_status::type status;\n    std::string app_type;\n    std::string app_name;\n    int32_t app_id;\n    int32_t partition_count;\n    std::map<std::string, std::string> envs;\n    bool is_stateful;\n    int32_t max_replica_count;\n    int64_t expire_second;\n    int64_t create_second;\n    int64_t drop_second;\n    bool duplicating;\n    int32_t init_partition_count;\n    bool is_bulk_loading;\n\n    _app_info__isset __isset;\n\n    void __set_status(const app_status::type val);\n\n    void __set_app_type(const std::string &val);\n\n    void __set_app_name(const std::string &val);\n\n    void __set_app_id(const int32_t val);\n\n    void __set_partition_count(const int32_t val);\n\n    void __set_envs(const std::map<std::string, std::string> &val);\n\n    void __set_is_stateful(const bool val);\n\n    void __set_max_replica_count(const int32_t val);\n\n    void __set_expire_second(const int64_t val);\n\n    void __set_create_second(const int64_t val);\n\n    void __set_drop_second(const int64_t val);\n\n    void __set_duplicating(const bool val);\n\n    void __set_init_partition_count(const int32_t val);\n\n    void __set_is_bulk_loading(const bool val);\n\n    bool operator==(const app_info &rhs) const\n    {\n        if (!(status == rhs.status))\n            return false;\n        if (!(app_type == rhs.app_type))\n            return false;\n        if (!(app_name == rhs.app_name))\n            return false;\n        if (!(app_id == rhs.app_id))\n            return false;\n        if (!(partition_count == rhs.partition_count))\n            return false;\n        if (!(envs == rhs.envs))\n            return false;\n        if (!(is_stateful == rhs.is_stateful))\n            return false;\n        if (!(max_replica_count == rhs.max_replica_count))\n            return false;\n        if (!(expire_second == rhs.expire_second))\n            return false;\n        if (!(create_second == rhs.create_second))\n            return false;\n        if (!(drop_second == rhs.drop_second))\n            return false;\n        if (__isset.duplicating != rhs.__isset.duplicating)\n            return false;\n        else if (__isset.duplicating && !(duplicating == rhs.duplicating))\n            return false;\n        if (!(init_partition_count == rhs.init_partition_count))\n            return false;\n        if (__isset.is_bulk_loading != rhs.__isset.is_bulk_loading)\n            return false;\n        else if (__isset.is_bulk_loading && !(is_bulk_loading == rhs.is_bulk_loading))\n            return false;\n        return true;\n    }\n    bool operator!=(const app_info &rhs) const { return !(*this == rhs); }\n\n    bool operator<(const app_info &) const;\n\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    virtual void printTo(std::ostream &out) const;\n};\n\nvoid swap(app_info &a, app_info &b);\n\ninline std::ostream &operator<<(std::ostream &out, const app_info &obj)\n{\n    obj.printTo(out);\n    return out;\n}\n\n} // namespace\n\n#endif\n"
  },
  {
    "path": "include/dsn/cpp/serialization_helper/dsn_types.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/cpp/serialization.h>"
  },
  {
    "path": "include/dsn/cpp/serialization_helper/thrift_helper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     2016-02-24, Weijie Sun(sunweijie[at]xiaomi.com), add support for serialization in thrift\n *     2016-03-01, Weijie Sun(sunweijie[at]xiaomi.com), add support for rpc in thrift\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <dsn/cpp/rpc_stream.h>\n\n#include <thrift/Thrift.h>\n#include <thrift/protocol/TBinaryProtocol.h>\n#include <thrift/protocol/TJSONProtocol.h>\n#include <thrift/protocol/TVirtualProtocol.h>\n#include <thrift/transport/TVirtualTransport.h>\n#include <thrift/TApplicationException.h>\n#include <type_traits>\n\nusing namespace ::apache::thrift::transport;\nnamespace dsn {\n\nclass binary_reader_transport : public TVirtualTransport<binary_reader_transport>\n{\npublic:\n    binary_reader_transport(binary_reader &reader) : _reader(reader) {}\n\n    bool isOpen() { return true; }\n\n    void open() {}\n\n    void close() {}\n\n    uint32_t read(uint8_t *buf, uint32_t len)\n    {\n        int l = _reader.read((char *)buf, static_cast<int>(len));\n        if (dsn_unlikely(l <= 0)) {\n            throw TTransportException(TTransportException::END_OF_FILE,\n                                      \"no more data to read after end-of-buffer\");\n        }\n        return (uint32_t)l;\n    }\n\nprivate:\n    binary_reader &_reader;\n};\n\nclass binary_writer_transport : public TVirtualTransport<binary_writer_transport>\n{\npublic:\n    binary_writer_transport(binary_writer &writer) : _writer(writer) {}\n\n    bool isOpen() { return true; }\n\n    void open() {}\n\n    void close() {}\n\n    void write(const uint8_t *buf, uint32_t len)\n    {\n        _writer.write((const char *)buf, static_cast<int>(len));\n    }\n\nprivate:\n    binary_writer &_writer;\n};\n\n#define DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(TName, TRealName, TTag, TMethod)                     \\\n    inline uint32_t write_base(::apache::thrift::protocol::TProtocol *proto, const TName &val)     \\\n    {                                                                                              \\\n        return proto->write##TMethod((const TRealName &)val);                                      \\\n    }                                                                                              \\\n    inline uint32_t read_base(::apache::thrift::protocol::TProtocol *proto, /*out*/ TName &val)    \\\n    {                                                                                              \\\n        return proto->read##TMethod((TRealName &)val);                                             \\\n    }\n\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(bool, bool, BOOL, Bool)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int8_t, int8_t, I08, Byte)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int16_t, int16_t, I16, I16)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int32_t, int32_t, I32, I32)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int64_t, int64_t, I64, I64)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(uint8_t, int8_t, I08, Byte)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(uint16_t, int16_t, I16, I16)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(uint32_t, int32_t, I32, I32)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(uint64_t, int64_t, I64, I64)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(double, double, DOUBLE, Double)\nDEFINE_THRIFT_BASE_TYPE_SERIALIZATION(std::string, std::string, STRING, String)\n\ntemplate <typename T>\nuint32_t marshall_base(::apache::thrift::protocol::TProtocol *oproto, const T &val);\ntemplate <typename T>\nuint32_t unmarshall_base(::apache::thrift::protocol::TProtocol *iproto, T &val);\n\ntemplate <typename T>\ninline uint32_t write_base(::apache::thrift::protocol::TProtocol *oprot, const std::vector<T> &val)\n{\n    uint32_t xfer = oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT,\n                                          static_cast<uint32_t>(val.size()));\n    for (auto iter = val.begin(); iter != val.end(); ++iter) {\n        marshall_base(oprot, *iter);\n    }\n    xfer += oprot->writeListEnd();\n    return xfer;\n}\n\ntemplate <typename T>\ninline uint32_t read_base(::apache::thrift::protocol::TProtocol *iprot, std::vector<T> &val)\n{\n    uint32_t xfer = 0;\n\n    val.clear();\n    uint32_t size;\n    ::apache::thrift::protocol::TType element_type;\n    xfer += iprot->readListBegin(element_type, size);\n    val.resize(size);\n    for (uint32_t i = 0; i != size; ++i) {\n        xfer += unmarshall_base(iprot, val[i]);\n    }\n    xfer += iprot->readListEnd();\n\n    return xfer;\n}\n\ntemplate <typename T_KEY, typename T_VALUE>\ninline uint32_t write_base(::apache::thrift::protocol::TProtocol *oprot,\n                           const std::map<T_KEY, T_VALUE> &val)\n{\n    uint32_t xfer = 0;\n\n    xfer += oprot->writeMapBegin(::apache::thrift::protocol::T_STRUCT,\n                                 ::apache::thrift::protocol::T_STRUCT,\n                                 static_cast<uint32_t>(val.size()));\n    for (auto iter = val.begin(); iter != val.end(); ++iter) {\n        xfer += marshall_base(oprot, iter->first);\n        xfer += marshall_base(oprot, iter->second);\n    }\n    xfer += oprot->writeMapEnd();\n\n    return xfer;\n}\n\ntemplate <typename T_KEY, typename T_VALUE>\ninline uint32_t read_base(::apache::thrift::protocol::TProtocol *iprot,\n                          std::map<T_KEY, T_VALUE> &val)\n{\n    int xfer = 0;\n\n    uint32_t size;\n    ::apache::thrift::protocol::TType ktype;\n    ::apache::thrift::protocol::TType vtype;\n    xfer += iprot->readMapBegin(ktype, vtype, size);\n    for (uint32_t i = 0; i < size; ++i) {\n        T_KEY mkey;\n        xfer += unmarshall_base(iprot, mkey);\n        T_VALUE &mval = val[mkey];\n        xfer += unmarshall_base(iprot, mval);\n    }\n    xfer += iprot->readMapEnd();\n\n    return xfer;\n}\n\nclass blob_string\n{\nprivate:\n    blob &_buffer;\n\npublic:\n    blob_string(blob &bb) : _buffer(bb) {}\n\n    void clear() { _buffer.assign(std::shared_ptr<char>(nullptr), 0, 0); }\n    void resize(std::size_t new_size)\n    {\n        std::shared_ptr<char> b(dsn::utils::make_shared_array<char>(new_size));\n        _buffer.assign(b, 0, static_cast<int>(new_size));\n    }\n    void assign(const char *ptr, std::size_t size)\n    {\n        std::shared_ptr<char> b(dsn::utils::make_shared_array<char>(size));\n        memcpy(b.get(), ptr, size);\n        _buffer.assign(b, 0, static_cast<int>(size));\n    }\n    const char *data() const { return _buffer.data(); }\n    size_t size() const { return _buffer.length(); }\n\n    char &operator[](int pos) { return const_cast<char *>(_buffer.data())[pos]; }\n};\n\ninline uint32_t rpc_address::read(apache::thrift::protocol::TProtocol *iprot)\n{\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(iprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        auto r = iprot->readI64(reinterpret_cast<int64_t &>(_addr.value));\n        dassert(_addr.v4.type == HOST_TYPE_INVALID || _addr.v4.type == HOST_TYPE_IPV4,\n                \"only invalid or ipv4 can be deserialized from binary\");\n        return r;\n    } else {\n        // the protocol is json protocol\n        std::string host;\n        int port = 0;\n\n        uint32_t xfer = 0;\n        std::string fname;\n        ::apache::thrift::protocol::TType ftype;\n        int16_t fid;\n\n        xfer += iprot->readStructBegin(fname);\n\n        using ::apache::thrift::protocol::TProtocolException;\n\n        while (true) {\n            xfer += iprot->readFieldBegin(fname, ftype, fid);\n            if (ftype == ::apache::thrift::protocol::T_STOP) {\n                break;\n            }\n            switch (fid) {\n            case 1:\n                if (ftype == ::apache::thrift::protocol::T_STRING) {\n                    xfer += iprot->readString(host);\n                } else {\n                    xfer += iprot->skip(ftype);\n                }\n                break;\n            case 2:\n                if (ftype == ::apache::thrift::protocol::T_I32) {\n                    xfer += iprot->readI32(port);\n                } else {\n                    xfer += iprot->skip(ftype);\n                }\n                break;\n            default:\n                xfer += iprot->skip(ftype);\n                break;\n            }\n            xfer += iprot->readFieldEnd();\n        }\n\n        xfer += iprot->readStructEnd();\n\n        // currently only support ipv4 format\n        this->assign_ipv4(host.c_str(), port);\n\n        return xfer;\n    }\n}\n\ninline uint32_t rpc_address::write(apache::thrift::protocol::TProtocol *oprot) const\n{\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(oprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        dassert(_addr.v4.type == HOST_TYPE_INVALID || _addr.v4.type == HOST_TYPE_IPV4,\n                \"only invalid or ipv4 can be serialized to binary\");\n        return oprot->writeI64((int64_t)_addr.value);\n    } else {\n        // the protocol is json protocol\n        std::string host(this->to_string());\n        int port = 0;\n        size_t sep_index = host.find(':');\n        if (sep_index != std::string::npos) {\n            port = std::stoi(host.substr(sep_index + 1));\n            host = host.substr(0, sep_index);\n        }\n\n        uint32_t xfer = 0;\n\n        xfer += oprot->writeStructBegin(\"rpc_address\");\n\n        xfer += oprot->writeFieldBegin(\"host\", ::apache::thrift::protocol::T_STRING, 1);\n        xfer += oprot->writeString(host);\n        xfer += oprot->writeFieldEnd();\n\n        xfer += oprot->writeFieldBegin(\"port\", ::apache::thrift::protocol::T_I32, 2);\n        xfer += oprot->writeI32(port);\n        xfer += oprot->writeFieldEnd();\n\n        xfer += oprot->writeFieldStop();\n        xfer += oprot->writeStructEnd();\n        return xfer;\n    }\n}\n\ninline uint32_t gpid::read(apache::thrift::protocol::TProtocol *iprot)\n{\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(iprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        return iprot->readI64(reinterpret_cast<int64_t &>(_value.value));\n    } else {\n        // the protocol is json protocol\n        uint32_t xfer = 0;\n        std::string fname;\n        ::apache::thrift::protocol::TType ftype;\n        int16_t fid;\n\n        xfer += iprot->readStructBegin(fname);\n\n        using ::apache::thrift::protocol::TProtocolException;\n\n        while (true) {\n            xfer += iprot->readFieldBegin(fname, ftype, fid);\n            if (ftype == ::apache::thrift::protocol::T_STOP) {\n                break;\n            }\n            switch (fid) {\n            case 1:\n                if (ftype == ::apache::thrift::protocol::T_I64) {\n                    xfer += iprot->readI64(reinterpret_cast<int64_t &>(_value.value));\n                } else {\n                    xfer += iprot->skip(ftype);\n                }\n                break;\n            default:\n                xfer += iprot->skip(ftype);\n                break;\n            }\n            xfer += iprot->readFieldEnd();\n        }\n\n        xfer += iprot->readStructEnd();\n\n        return xfer;\n    }\n}\n\ninline uint32_t gpid::write(apache::thrift::protocol::TProtocol *oprot) const\n{\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(oprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        return oprot->writeI64((int64_t)_value.value);\n    } else {\n        // the protocol is json protocol\n        uint32_t xfer = 0;\n\n        xfer += oprot->writeStructBegin(\"gpid\");\n\n        xfer += oprot->writeFieldBegin(\"id\", ::apache::thrift::protocol::T_I64, 1);\n        xfer += oprot->writeI64((int64_t)_value.value);\n        xfer += oprot->writeFieldEnd();\n\n        xfer += oprot->writeFieldStop();\n        xfer += oprot->writeStructEnd();\n        return xfer;\n    }\n}\n\ninline uint32_t task_code::read(apache::thrift::protocol::TProtocol *iprot)\n{\n    std::string task_code_string;\n    uint32_t xfer = 0;\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(iprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        xfer += iprot->readString(task_code_string);\n    } else {\n        // the protocol is json protocol\n        std::string fname;\n        ::apache::thrift::protocol::TType ftype;\n        int16_t fid;\n\n        xfer += iprot->readStructBegin(fname);\n\n        using ::apache::thrift::protocol::TProtocolException;\n\n        while (true) {\n            xfer += iprot->readFieldBegin(fname, ftype, fid);\n            if (ftype == ::apache::thrift::protocol::T_STOP) {\n                break;\n            }\n            switch (fid) {\n            case 1:\n                if (ftype == ::apache::thrift::protocol::T_STRING) {\n                    xfer += iprot->readString(task_code_string);\n                } else {\n                    xfer += iprot->skip(ftype);\n                }\n                break;\n            default:\n                xfer += iprot->skip(ftype);\n                break;\n            }\n            xfer += iprot->readFieldEnd();\n        }\n\n        xfer += iprot->readStructEnd();\n    }\n    _internal_code = try_get(task_code_string, TASK_CODE_INVALID);\n    return xfer;\n}\n\ninline uint32_t task_code::write(apache::thrift::protocol::TProtocol *oprot) const\n{\n    const char *name = to_string();\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(oprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        return binary_proto->writeString(string_view(name));\n    } else {\n        // the protocol is json protocol\n        uint32_t xfer = 0;\n        xfer += oprot->writeStructBegin(\"task_code\");\n\n        xfer += oprot->writeFieldBegin(\"code\", ::apache::thrift::protocol::T_STRING, 1);\n        xfer += oprot->writeString(std::string(name));\n        xfer += oprot->writeFieldEnd();\n\n        xfer += oprot->writeFieldStop();\n        xfer += oprot->writeStructEnd();\n        return xfer;\n    }\n}\n\ninline uint32_t blob::read(apache::thrift::protocol::TProtocol *iprot)\n{\n    // for optimization, it is dangerous if the oprot is not a binary proto\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        static_cast<apache::thrift::protocol::TBinaryProtocol *>(iprot);\n    blob_string str(*this);\n    return binary_proto->readString<blob_string>(str);\n}\n\ninline uint32_t blob::write(apache::thrift::protocol::TProtocol *oprot) const\n{\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        static_cast<apache::thrift::protocol::TBinaryProtocol *>(oprot);\n    return binary_proto->writeString<blob_string>(blob_string(const_cast<blob &>(*this)));\n}\n\ninline uint32_t error_code::read(apache::thrift::protocol::TProtocol *iprot)\n{\n    std::string ec_string;\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(iprot);\n    uint32_t xfer = 0;\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        xfer += iprot->readString(ec_string);\n    } else {\n        // the protocol is json protocol\n        std::string fname;\n        ::apache::thrift::protocol::TType ftype;\n        int16_t fid;\n\n        xfer += iprot->readStructBegin(fname);\n\n        using ::apache::thrift::protocol::TProtocolException;\n\n        while (true) {\n            xfer += iprot->readFieldBegin(fname, ftype, fid);\n            if (ftype == ::apache::thrift::protocol::T_STOP) {\n                break;\n            }\n            switch (fid) {\n            case 1:\n                if (ftype == ::apache::thrift::protocol::T_STRING) {\n                    xfer += iprot->readString(ec_string);\n                } else {\n                    xfer += iprot->skip(ftype);\n                }\n                break;\n            default:\n                xfer += iprot->skip(ftype);\n                break;\n            }\n            xfer += iprot->readFieldEnd();\n        }\n\n        xfer += iprot->readStructEnd();\n    }\n    *this = error_code::try_get(ec_string, ERR_UNKNOWN);\n    return xfer;\n}\n\ninline uint32_t error_code::write(apache::thrift::protocol::TProtocol *oprot) const\n{\n    const char *name = to_string();\n    apache::thrift::protocol::TBinaryProtocol *binary_proto =\n        dynamic_cast<apache::thrift::protocol::TBinaryProtocol *>(oprot);\n    if (binary_proto != nullptr) {\n        // the protocol is binary protocol\n        return binary_proto->writeString(string_view(name));\n    } else {\n        // the protocol is json protocol\n        uint32_t xfer = 0;\n        xfer += oprot->writeStructBegin(\"error_code\");\n\n        xfer += oprot->writeFieldBegin(\"code\", ::apache::thrift::protocol::T_STRING, 1);\n        xfer += oprot->writeString(std::string(name));\n        xfer += oprot->writeFieldEnd();\n\n        xfer += oprot->writeFieldStop();\n        xfer += oprot->writeStructEnd();\n        return xfer;\n    }\n}\n\ninline const char *to_string(const rpc_address &addr) { return addr.to_string(); }\ninline const char *to_string(const blob &blob) { return \"\"; }\ninline const char *to_string(const task_code &code) { return code.to_string(); }\ninline const char *to_string(const error_code &ec) { return ec.to_string(); }\ninline const char *to_string(const gpid &id)\n{\n    static char str[64];\n    snprintf(str, 64, \"%d.%d\", id.get_app_id(), id.get_partition_index());\n    return str;\n}\n\ntemplate <typename T>\nclass serialization_forwarder\n{\nprivate:\n    template <typename C>\n    static constexpr auto check_method(C *) -> typename std::is_same<\n        decltype(std::declval<C>().write(std::declval<::apache::thrift::protocol::TProtocol *>())),\n        uint32_t>::type;\n\n    template <typename>\n    static constexpr std::false_type check_method(...);\n\n    typedef decltype(check_method<T>(nullptr)) has_read_write_method;\n\n    static uint32_t marshall_internal(::apache::thrift::protocol::TProtocol *oproto,\n                                      const T &value,\n                                      std::false_type)\n    {\n        return write_base(oproto, value);\n    }\n\n    static uint32_t\n    marshall_internal(::apache::thrift::protocol::TProtocol *oproto, const T &value, std::true_type)\n    {\n        return value.write(oproto);\n    }\n\n    static uint32_t\n    unmarshall_internal(::apache::thrift::protocol::TProtocol *iproto, T &value, std::false_type)\n    {\n        return read_base(iproto, value);\n    }\n\n    static uint32_t\n    unmarshall_internal(::apache::thrift::protocol::TProtocol *iproto, T &value, std::true_type)\n    {\n        return value.read(iproto);\n    }\n\npublic:\n    static uint32_t marshall(::apache::thrift::protocol::TProtocol *oproto, const T &value)\n    {\n        return marshall_internal(oproto, value, has_read_write_method());\n    }\n\n    static uint32_t unmarshall(::apache::thrift::protocol::TProtocol *iproto, T &value)\n    {\n        return unmarshall_internal(iproto, value, has_read_write_method());\n    }\n};\n\ntemplate <typename TName>\ninline uint32_t marshall_base(::apache::thrift::protocol::TProtocol *oproto, const TName &val)\n{\n    return serialization_forwarder<TName>::marshall(oproto, val);\n}\n\ntemplate <typename TName>\ninline uint32_t unmarshall_base(::apache::thrift::protocol::TProtocol *iproto, /*out*/ TName &val)\n{\n    // well, we assume read/write are in coupled\n    return serialization_forwarder<TName>::unmarshall(iproto, val);\n}\n\n#define GET_THRIFT_TYPE_MACRO(cpp_type, thrift_type)                                               \\\n    inline ::apache::thrift::protocol::TType get_thrift_type(const cpp_type &)                     \\\n    {                                                                                              \\\n        return ::apache::thrift::protocol::thrift_type;                                            \\\n    }\n\nGET_THRIFT_TYPE_MACRO(bool, T_BOOL)\nGET_THRIFT_TYPE_MACRO(int8_t, T_BYTE)\nGET_THRIFT_TYPE_MACRO(uint8_t, T_BYTE)\nGET_THRIFT_TYPE_MACRO(int16_t, T_I16)\nGET_THRIFT_TYPE_MACRO(uint16_t, T_I16)\nGET_THRIFT_TYPE_MACRO(int32_t, T_I32)\nGET_THRIFT_TYPE_MACRO(uint32_t, T_I32)\nGET_THRIFT_TYPE_MACRO(int64_t, T_I64)\nGET_THRIFT_TYPE_MACRO(uint64_t, T_U64)\nGET_THRIFT_TYPE_MACRO(double, T_DOUBLE)\nGET_THRIFT_TYPE_MACRO(std::string, T_STRING)\n\ntemplate <typename T>\ninline ::apache::thrift::protocol::TType get_thrift_type(const std::vector<T> &)\n{\n    return ::apache::thrift::protocol::T_LIST;\n}\n\ntemplate <typename T>\ninline ::apache::thrift::protocol::TType get_thrift_type(const T &)\n{\n    return ::apache::thrift::protocol::T_STRUCT;\n}\n\ntemplate <typename T>\ninline void marshall_thrift_internal(const T &val, ::apache::thrift::protocol::TProtocol *proto)\n{\n    /*\n     * we treat every element as a whole struct\n     */\n    proto->writeStructBegin(\"thrift_rpc_result\");\n    proto->writeFieldBegin(\"success\", get_thrift_type(val), 0);\n    marshall_base<T>(proto, val);\n    proto->writeFieldEnd();\n    proto->writeFieldStop();\n    proto->writeStructEnd();\n}\n\ntemplate <typename T>\ninline void unmarshall_thrift_internal(T &val, ::apache::thrift::protocol::TProtocol *proto)\n{\n    std::string fname;\n    ::apache::thrift::protocol::TType ftype;\n    int16_t fid;\n    proto->readStructBegin(fname);\n\n    // read the struct\n    proto->readFieldBegin(fname, ftype, fid);\n    unmarshall_base<T>(proto, val);\n    proto->readFieldEnd();\n\n    // read the stop\n    proto->readFieldBegin(fname, ftype, fid);\n\n    proto->readStructEnd();\n}\n\ntemplate <typename T>\ninline void marshall_thrift_binary(binary_writer &writer, const T &val)\n{\n    ::dsn::binary_writer_transport trans(writer);\n    boost::shared_ptr<::dsn::binary_writer_transport> transport(\n        &trans, [](::dsn::binary_writer_transport *) {});\n    ::apache::thrift::protocol::TBinaryProtocol proto(transport);\n    marshall_thrift_internal(val, &proto);\n    proto.getTransport()->flush();\n}\n\ntemplate <typename T>\ninline void marshall_thrift_json(binary_writer &writer, const T &val)\n{\n    ::dsn::binary_writer_transport trans(writer);\n    boost::shared_ptr<::dsn::binary_writer_transport> transport(\n        &trans, [](::dsn::binary_writer_transport *) {});\n    ::apache::thrift::protocol::TJSONProtocol proto(transport);\n    marshall_thrift_internal(val, &proto);\n    proto.getTransport()->flush();\n}\n\ntemplate <typename T>\ninline void unmarshall_thrift_binary(binary_reader &reader, T &val)\n{\n    ::dsn::binary_reader_transport trans(reader);\n    boost::shared_ptr<::dsn::binary_reader_transport> transport(\n        &trans, [](::dsn::binary_reader_transport *) {});\n    ::apache::thrift::protocol::TBinaryProtocol proto(transport);\n    unmarshall_thrift_internal(val, &proto);\n}\n\ntemplate <typename T>\ninline void unmarshall_thrift_json(binary_reader &reader, T &val)\n{\n    ::dsn::binary_reader_transport trans(reader);\n    boost::shared_ptr<::dsn::binary_reader_transport> transport(\n        &trans, [](::dsn::binary_reader_transport *) {});\n    ::apache::thrift::protocol::TJSONProtocol proto(transport);\n    unmarshall_thrift_internal(val, &proto);\n}\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/cpp/serverlet.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <dsn/cpp/service_app.h>\n#include <dsn/cpp/rpc_holder.h>\n#include <dsn/cpp/serialization.h>\n\nnamespace dsn {\n/*!\n@addtogroup rpc-server\n@{\n*/\n\n//\n// for TRequest/TResponse, we assume that the following routines are defined:\n//    marshall(binary_writer& writer, const T& val);\n//    unmarshall(binary_reader& reader, /*out*/ T& val);\n// either in the namespace of ::dsn or T\n// developers may write these helper functions by their own, or use tools\n// such as protocol-buffer, thrift, or bond to generate these functions automatically\n// for their TRequest and TResponse\n//\n\ntemplate <typename TResponse>\nclass rpc_replier\n{\npublic:\n    rpc_replier(dsn::message_ex *response) { _response = response; }\n    rpc_replier(rpc_replier &&r)\n    {\n        _response = r._response;\n        r._response = nullptr;\n    }\n    rpc_replier &operator=(rpc_replier &&r)\n    {\n        release();\n        _response = r._response;\n        r._response = nullptr;\n        return *this;\n    }\n\n    ~rpc_replier() { release(); }\n\n    rpc_replier(const rpc_replier &r) = delete;\n    rpc_replier(rpc_replier &r) = delete;\n    rpc_replier &operator=(const rpc_replier &r) = delete;\n    rpc_replier &operator=(rpc_replier &r) = delete;\n\n    void operator()(const TResponse &resp)\n    {\n        if (_response != nullptr) {\n            ::dsn::marshall(_response, resp);\n            dsn_rpc_reply(_response);\n            _response = nullptr;\n        }\n    }\n\n    bool is_empty() const { return _response == nullptr; }\n\n    // response message, may be nullptr\n    dsn::message_ex *response_message() const { return _response; }\n\n    // the address where send response to\n    rpc_address to_address() const\n    {\n        return _response != nullptr ? _response->to_address : rpc_address::s_invalid_address;\n    }\n\nprivate:\n    void release()\n    {\n        if (_response != nullptr) {\n            _response->add_ref();\n            _response->release_ref();\n            _response = nullptr;\n        }\n    }\n    dsn::message_ex *_response;\n};\n\ntemplate <typename T> // where T : serverlet<T>\nclass serverlet\n{\npublic:\n    explicit serverlet(const char *nm);\n    virtual ~serverlet();\n\nprotected:\n    template <typename TRequest>\n    bool register_rpc_handler(task_code rpc_code,\n                              const char *extra_name,\n                              void (T::*handler)(const TRequest &));\n\n    template <typename TRequest, typename TResponse>\n    bool register_rpc_handler(task_code rpc_code,\n                              const char *extra_name,\n                              void (T::*handler)(const TRequest &, TResponse &));\n\n    template <typename TRpcHolder>\n    bool register_rpc_handler_with_rpc_holder(dsn::task_code rpc_code,\n                                              const char *extra_name,\n                                              void (T::*handler)(TRpcHolder));\n\n    template <typename TRequest, typename TResponse>\n    bool register_async_rpc_handler(task_code rpc_code,\n                                    const char *extra_name,\n                                    void (T::*handler)(const TRequest &, rpc_replier<TResponse> &));\n\n    bool register_rpc_handler(task_code rpc_code,\n                              const char *extra_name,\n                              void (T::*handler)(dsn::message_ex *));\n\n    bool unregister_rpc_handler(task_code rpc_code);\n\n    template <typename TResponse>\n    void reply(dsn::message_ex *request, const TResponse &resp);\n\npublic:\n    const std::string &name() const { return _name; }\n\nprivate:\n    std::string _name;\n};\n\n// ------------- inline implementation ----------------\ntemplate <typename T>\ninline serverlet<T>::serverlet(const char *nm) : _name(nm)\n{\n}\n\ntemplate <typename T>\ninline serverlet<T>::~serverlet()\n{\n}\n\ntemplate <typename T>\ntemplate <typename TRequest>\ninline bool serverlet<T>::register_rpc_handler(task_code rpc_code,\n                                               const char *extra_name,\n                                               void (T::*handler)(const TRequest &))\n{\n    rpc_request_handler cb = [this, handler](dsn::message_ex *request) {\n        TRequest req;\n        ::dsn::unmarshall(request, req);\n        (((T *)this)->*(handler))(req);\n    };\n\n    return dsn_rpc_register_handler(rpc_code, extra_name, cb);\n}\n\ntemplate <typename T>\ntemplate <typename TRequest, typename TResponse>\ninline bool serverlet<T>::register_rpc_handler(task_code rpc_code,\n                                               const char *extra_name,\n                                               void (T::*handler)(const TRequest &, TResponse &))\n{\n    rpc_request_handler cb = [this, handler](dsn::message_ex *request) {\n        TRequest req;\n        ::dsn::unmarshall(request, req);\n\n        TResponse resp;\n        (((T *)this)->*(handler))(req, resp);\n        rpc_replier<TResponse> replier(request->create_response());\n        replier(resp);\n    };\n    return dsn_rpc_register_handler(rpc_code, extra_name, cb);\n}\n\ntemplate <typename T>\ntemplate <typename TRpcHolder>\ninline bool serverlet<T>::register_rpc_handler_with_rpc_holder(dsn::task_code rpc_code,\n                                                               const char *extra_name,\n                                                               void (T::*handler)(TRpcHolder))\n{\n    rpc_request_handler cb = [this, handler](dsn::message_ex *request) {\n        (((T *)this)->*(handler))(TRpcHolder::auto_reply(request));\n    };\n\n    return dsn_rpc_register_handler(rpc_code, extra_name, cb);\n}\n\ntemplate <typename T>\ntemplate <typename TRequest, typename TResponse>\ninline bool serverlet<T>::register_async_rpc_handler(task_code rpc_code,\n                                                     const char *extra_name,\n                                                     void (T::*handler)(const TRequest &,\n                                                                        rpc_replier<TResponse> &))\n{\n    rpc_request_handler cb = [this, handler](dsn::message_ex *request) {\n        TRequest req;\n        ::dsn::unmarshall(request, req);\n        rpc_replier<TResponse> replier(request->create_response());\n        (((T *)this)->*(handler))(req, replier);\n    };\n    return dsn_rpc_register_handler(rpc_code, extra_name, cb);\n}\n\ntemplate <typename T>\ninline bool serverlet<T>::register_rpc_handler(task_code rpc_code,\n                                               const char *extra_name,\n                                               void (T::*handler)(dsn::message_ex *))\n{\n    rpc_request_handler cb = [this, handler](dsn::message_ex *request) {\n        (((T *)this)->*(handler))(request);\n    };\n\n    return dsn_rpc_register_handler(rpc_code, extra_name, cb);\n}\n\ntemplate <typename T>\ninline bool serverlet<T>::unregister_rpc_handler(task_code rpc_code)\n{\n    return dsn_rpc_unregiser_handler(rpc_code);\n}\n\ntemplate <typename T>\ntemplate <typename TResponse>\ninline void serverlet<T>::reply(dsn::message_ex *request, const TResponse &resp)\n{\n    auto msg = request->create_response();\n    ::dsn::marshall(msg, resp);\n    dsn_rpc_reply(msg);\n}\n/*@}*/\n} // end namespace\n"
  },
  {
    "path": "include/dsn/cpp/service_app.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     application model atop zion in c++\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/gpid.h>\n#include <dsn/utility/factory_store.h>\n#include <vector>\n#include <string>\n\nnamespace dsn {\n/*!\n@addtogroup app-model\n@{\n*/\n\nstruct service_app_info\n{\n    int entity_id;\n    int index;\n    std::string role_name;\n    std::string full_name;\n    std::string type;\n    std::string data_dir;\n};\n\nclass service_app\n{\npublic:\n    template <typename T>\n    static service_app *create(const service_app_info *info)\n    {\n        return new T(info);\n    }\n    template <typename T>\n    static void register_factory(const char *name)\n    {\n        utils::factory_store<service_app>::register_factory(name, create<T>, PROVIDER_TYPE_MAIN);\n    }\n    static service_app *new_service_app(const std::string &type, const service_app_info *info);\n\n    static const service_app_info &current_service_app_info();\n    static void get_all_service_apps(std::vector<service_app *> *apps);\n\npublic:\n    service_app(const service_app_info *info);\n    virtual ~service_app() {}\n    // The back two members of args are version and git commit code, which are passed to\n    // version_http_service.\n    virtual error_code start(const std::vector<std::string> &args) { return ERR_OK; }\n    virtual error_code stop(bool cleanup = false) { return ERR_OK; }\n    virtual void on_intercepted_request(gpid pid, bool is_write, dsn::message_ex *msg)\n    {\n        dassert(false, \"not supported\");\n    }\n\n    bool is_started() const { return _started; }\n    rpc_address primary_address() const { return _address; }\n    void set_address(const rpc_address &addr) { _address = addr; }\n    void set_started(bool start_flag) { _started = start_flag; }\n    const service_app_info &info() const;\n\nprotected:\n    const service_app_info *const _info;\n    rpc_address _address;\n    bool _started;\n};\n\n/*@}*/\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/block_service.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\n#include <dsn/tool-api/task_code.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/dist/replication.h>\n#include <functional>\n\nnamespace dsn {\n\nnamespace dist {\nnamespace block_service {\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_BLOCK_SERVICE)\n\nclass block_file;\ntypedef dsn::ref_ptr<block_file> block_file_ptr;\n\n/**\n * @brief The ls_request struct, use to list all the files and directories under the dir_name\n * dir_name: a valid absolute path string, which \"/\" as splitter.We don't support relative path\n */\nstruct ls_request\n{\n    std::string dir_name;\n};\n\n/**\n * @brief The ls_entry struct\n *  entry_name: an entry name which doesn't contain the directory preceding\n */\nstruct ls_entry\n{\n    std::string entry_name;\n    bool is_directory;\n};\n\n/**\n * @brief The ls_response struct\n *  err: ERR_OK means the ls request is responsed succeed. Then\n *          user can view all entries by entries.\n *       ERR_OBJECT_NOT_FOUND: can't find the dir by ls_request.dir_name\n *       ERR_INVALID_PARAMETERS: the ls_request.dir_name is not a dir\n *       ERR_TIMEOUT: request timeout\n *       ERR_FS_INTERNAL: an internal error occured in the service implementation\n *          which we can't handle\n */\nstruct ls_response\n{\n    dsn::error_code err;\n    // use shared_ptr to avoid extra memory copy\n    std::shared_ptr<std::vector<ls_entry>> entries;\n    ls_response() : entries(std::make_shared<std::vector<ls_entry>>()) {}\n};\ntypedef std::function<void(const ls_response &)> ls_callback;\ntypedef future_task<ls_response> ls_future;\ntypedef dsn::ref_ptr<ls_future> ls_future_ptr;\n\n/**\n * @brief The create_file_request struct, used to create a block_file_ptr\n *  file_name: a valid absolute path string, which \"/\" as splitter.\n *             We don't support relative path.\n *  ignore_metadata: With the flag set, implementation is not necessary to pre-fetch the\n *                   metadata(size, md5sum..) when create the handle. This is useful in\n *                   cases that users don't care the current content of the file, and they\n *                   simply need the handle and do operations. An implementation can do\n *                   some optimization according to this options.\n */\nstruct create_file_request\n{\n    std::string file_name;\n    bool ignore_metadata;\n};\n\n/**\n * @brief The create_file_response struct\n *  err: ERR_OK: the file handle is successfully created.\n *       ERR_TIMEOUT: request timeout\n *       ERR_FS_INTERNAL: an internal error occured in the service implementation\n *          which we can't handle\n *  file_handle: the file_handle will not be null if err is ERR_OK.\n *               user can read/write the file by the handle, and get the metata(size, md5..).\n *               please ref {@link #block_file::get_size}, {@link #block_file::get_md5sum} for\n *               the return value of metadata\n */\nstruct create_file_response\n{\n    dsn::error_code err;\n    block_file_ptr file_handle;\n};\ntypedef std::function<void(const create_file_response &)> create_file_callback;\ntypedef future_task<create_file_response> create_file_future;\ntypedef dsn::ref_ptr<create_file_future> create_file_future_ptr;\n\n/**\n * @brief The remove_path_request struct\n *  path: a valid absolute path string, which point to file or directory, which \"/\" as splitter\n *  recursive: if path point to a non-empty directory, and if recursive = true, then all the\n *             files or dirs under the path will be removed; if recursive = false, then the\n *             non-empty directory will not be removed.\n *             if path point to an empty directory or file, just remove the path.\n */\nstruct remove_path_request\n{\n    std::string path;\n    bool recursive;\n};\n\n/**\n * @brief The remove_path_response struct\n *  err:  ERR_OK: request succeed, and the remove path succeed\n *        ERR_OBJECT_NOT_FOUND: request succeed, but the path do not exist\n *        ERR_TIMEOUT: request timeout\n *        ERR_DIR_NOT_EMPTY: the directory is non-empty, can't be removed\n *        ERR_FS_INTERNAL:  remove directory failed, need check it again\n */\nstruct remove_path_response\n{\n    dsn::error_code err;\n};\ntypedef std::function<void(const remove_path_response &)> remove_path_callback;\ntypedef future_task<remove_path_response> remove_path_future;\ntypedef dsn::ref_ptr<remove_path_future> remove_path_future_ptr;\n\n/**\n * @brief The read_request struct\n *  remote_pos: where of the file to start read\n *  remote_length: the amount of bytes to read.\n *                 if set -1, means read to the end of the file\n */\nstruct read_request\n{\n    uint64_t remote_pos;\n    int64_t remote_length;\n};\n\n/**\n * @brief The read_response struct\n *  err: ERR_OK: read succeed\n *       ERR_OBJECT_NOT_FOUND: try to read an non-exist file.\n *          this happens when try to read a file handle which\n *          doesn't have a coressponding remote file\n *       ERR_TIMEOUT: request timeout\n *       ERR_FS_INTERNAL: an internal error occured in the service implementation\n *          which we can't handle\n *  buffer: the read data. The implementation can choose to return partially read data when\n *          error occured, or discard them and only return an empty buffer. But implementation\n *          should never return ERR_OK if partitial data got, otherwize the user can't tell\n *          whether partital data is transfered or reach the end of file with remote_length == -1.\n *          If ERR_OK returned but only partitial data got, means reach the end of file.\n */\nstruct read_response\n{\n    dsn::error_code err;\n    dsn::blob buffer;\n};\ntypedef std::function<void(const read_response &)> read_callback;\ntypedef future_task<read_response> read_future;\ntypedef dsn::ref_ptr<read_future> read_future_ptr;\n\n/**\n * @brief The write_request struct\n *  buffer: the new content of the file. Returns ERR_OK if and only if all the data in buffer\n *          has been written into the file.\n *  Notice: we don't have the insert/append semantic for file, only truncate.\n */\nstruct write_request\n{\n    dsn::blob buffer;\n};\n\n/**\n * @brief The write_response struct\n *  err: ERR_OK: write succeed\n *       ERR_TIMEOUT: request timeout\n *       ERR_FS_INTERNAL: an internal error occured in the service implementation\n *          which we can't handle\n *  written_size: amount of bytes have been written.\n *\n * Notice: user can call get_size/get_md5sum to get the metadata of the file\n */\nstruct write_response\n{\n    dsn::error_code err;\n    uint64_t written_size;\n};\ntypedef std::function<void(const write_response &)> write_callback;\ntypedef future_task<write_response> write_future;\ntypedef dsn::ref_ptr<write_future> write_future_ptr;\n\n/**\n * @brief The upload_request struct\n *  input_local_name: a local filesystem path, you can use a relative or absolute path.\n */\nstruct upload_request\n{\n    std::string input_local_name;\n};\n\n/**\n * @brief The upload_response struct\n *  similar to write_response with more errors in err:\n *     ERR_FILE_OPERATION_FAILED: open the local file for read failed.\n *\n * Notice: user can call get_size/get_md5sum to get the metadata of the file\n */\nstruct upload_response\n{\n    dsn::error_code err;\n    uint64_t uploaded_size;\n};\ntypedef std::function<void(const upload_response &)> upload_callback;\ntypedef future_task<upload_response> upload_future;\ntypedef dsn::ref_ptr<upload_future> upload_future_ptr;\n\n/**\n * @brief The download_request struct\n *  output_local_file: a local filesystem path, you can use a relative or absolute path.\n */\nstruct download_request\n{\n    std::string output_local_name;\n    uint64_t remote_pos;\n    int64_t remote_length;\n};\n/**\n * @brief The download_response struct\n * similar to read_response. With more errors in err:\n *    ERR_FILE_OPERATION_FAILED: open output_local_name for write failed.\n *    if try to download a non-exist file and with an invalid output_local_name,\n *    it's up to implementation to return which error.\n */\nstruct download_response\n{\n    dsn::error_code err;\n    uint64_t downloaded_size;\n    std::string file_md5;\n};\ntypedef std::function<void(const download_response &)> download_callback;\ntypedef future_task<download_response> download_future;\ntypedef dsn::ref_ptr<download_future> download_future_ptr;\n\nclass block_filesystem\n{\npublic:\n    template <typename T>\n    static block_filesystem *create()\n    {\n        return new T();\n    }\n\n    typedef block_filesystem *(*factory)();\n    block_filesystem() {}\n\n    /**\n     * @brief initialize\n     * @param args, the implemented related parameter in initializing\n     *        should be represented as strings and passed by args\n     * @return ERR_OK if initialized succeed. If failed, return with the failed error.\n     */\n    virtual error_code initialize(const std::vector<std::string> &args) = 0;\n\n    /**\n     * @brief list_dir\n     * @param req, ref {@link #ls_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr list_dir(const ls_request &req,\n                                   dsn::task_code code,\n                                   const ls_callback &callback,\n                                   dsn::task_tracker *tracker = nullptr) = 0;\n\n    /**\n     * @brief create_file\n     * @param req, ref {@link #create_file_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr create_file(const create_file_request &req,\n                                      dsn::task_code code,\n                                      const create_file_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr) = 0;\n\n    /**\n     * @brief remove_path\n     * @param req, ref {@link #remove_path_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr remove_path(const remove_path_request &req,\n                                      dsn::task_code code,\n                                      const remove_path_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr) = 0;\n\n    virtual bool is_root_path_set() const { return false; }\n\n    virtual ~block_filesystem() {}\n};\n\nclass block_file : public dsn::ref_counter\n{\npublic:\n    block_file(const std::string &name) : _name(name) {}\n    virtual ~block_file() {}\n    const std::string &file_name() const { return _name; }\n\n    /**\n     * @brief get_size\n     *    this api should never block, implementation should\n     *    fetch the size in {@link block_filesystem::create_file}.\n     *    if the block_file is created by \"ignore_metadata\" {@link create_file_request},\n     *    should return 0\n     * @return the file_size. If the file doesn't exist, should return 0\n     */\n    virtual uint64_t get_size() = 0;\n\n    /**\n     * @brief get_md5sum\n     *    this api should never block, implementation should\n     *    fetch the md5sum in {@link block_filesystem::create_file}.\n     *    if the block_file is created by \"ignore_metadata\" {@link create_file_request},\n     *    should return \"\"\n     * @return the md5 value. If the file doesn't exist, should return \"\".\n     *    NOTICE: if an existing file is empty(size == 0), the returning value is not \"\".\n     *    user can use this feature to check if a file exist.\n     */\n    virtual const std::string &get_md5sum() = 0;\n\n    /**\n     * @brief write\n     * @param req, ref {@link #write_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr write(const write_request &req,\n                                dsn::task_code code,\n                                const write_callback &cb,\n                                dsn::task_tracker *tracker = nullptr) = 0;\n\n    /**\n     * @brief read\n     * @param req, ref {@link #read_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr read(const read_request &req,\n                               dsn::task_code code,\n                               const read_callback &cb,\n                               dsn::task_tracker *tracker = nullptr) = 0;\n\n    /**\n     * @brief upload\n     * @param req, ref {@link #upload_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr upload(const upload_request &req,\n                                 dsn::task_code code,\n                                 const upload_callback &cb,\n                                 dsn::task_tracker *tracker = nullptr) = 0;\n\n    /**\n     * @brief download\n     * @param req, ref {@link #download_request}\n     * @param code, a task_code, describe how the callback executed\n     * @param callback, called when get the list result\n     * @param tracker\n     * @return a task which represent the async operation\n     */\n    virtual dsn::task_ptr download(const download_request &req,\n                                   dsn::task_code code,\n                                   const download_callback &cb,\n                                   dsn::task_tracker *tracker = nullptr) = 0;\n\nprotected:\n    std::string _name;\n};\n}\n}\n}\n"
  },
  {
    "path": "include/dsn/dist/common.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\nnamespace dsn {\n/// Returns the cluster name (i.e, \"onebox\") if it's configured under\n/// \"replication\" section:\n///    [replication]\n///      cluster_name = \"onebox\"\nextern const char *get_current_cluster_name();\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/distributed_lock_service.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n* Description:\n*     interface of the reliable distributed lock service\n*\n* Revision history:\n*     2015-10-28, Weijie Sun, first version\n*     2015-11-5, @imzhenyu (Zhenyu Guo), remove create and destroy API as they are\n*                unnecessary, adjust the interface, so that\n*                (1) return task_ptr for callers to cancel or wait;\n*                (2) add factory for provider registration;\n*                (3) add cb_code parameter, then users can specify where the callback\n*                    should be executed\n*     xxxx-xx-xx, author, fix bug about xxx\n*/\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/future_types.h>\n#include <string>\n#include <functional>\n#include <utility>\n\nnamespace dsn {\nnamespace dist {\n\ntypedef std::function<void(error_code ec, const std::string &owner_id, uint64_t version)>\n    lock_callback;\ntypedef future_task<error_code, std::string, uint64_t> lock_future;\ntypedef dsn::ref_ptr<lock_future> lock_future_ptr;\n\nclass distributed_lock_service\n{\npublic:\n    template <typename T>\n    static distributed_lock_service *create()\n    {\n        return new T();\n    }\n\n    typedef distributed_lock_service *(*factory)();\n\npublic:\n    struct lock_options\n    {\n        bool create_if_not_exist;\n        bool create_enable_cache;\n    };\n\n    virtual ~distributed_lock_service() {}\n    /*\n     * initialization routine\n     */\n    virtual error_code initialize(const std::vector<std::string> &args) = 0;\n\n    /*\n     * finalize routine\n     */\n    virtual error_code finalize() = 0;\n\n    /*\n     * lock\n     * lock_cb_code: the task code specifies where to execute the callback\n     * lock_cb: the callback is executed when there are error or lock granted\n     * lease_expire_code: the task code specifies where to execute the callback\n     * lease_expire_callback: the callback is executed when lease is expired\n     *                        and unlock is not invoked\n     * create_if_not_exist:\n     *   if distributed lock for lock_id doesn't exist, try to create one\n     *\n     * return:\n     *   the first task handle for lock granted or error\n     *   the second task is valid only when lock granted, and it is for lease expire\n     *\n     * possible ec:\n     *   ERR_INVALID_PARAMETERS, lock_id invalid, or cb==nullptr\n     *   ERR_TIMEOUT, creating lock timeout if create_if_not_exist==true\n     *   ERR_OBJECT_NOT_FOUND, lock doesn't exist and create_if_not_exist == false\n     *\n     *   ERR_OK, the caller gets the lock. when the lock is hold by others, the callback\n     *   is hold until it gets the lock.\n     *   ERR_RECURSIVE_LOCK, call \"lock\" again if it was called before in the process\n     *     context with the same parameter pair.\n     */\n    virtual std::pair<task_ptr, task_ptr> lock(const std::string &lock_id,\n                                               const std::string &myself_id,\n                                               task_code lock_cb_code,\n                                               const lock_callback &lock_cb,\n                                               task_code lease_expire_code,\n                                               const lock_callback &lease_expire_callback,\n                                               const lock_options &opt) = 0;\n\n    /*\n    * cancel the lock operation that is on pending\n    * cb_code: the task code specifies where to execute the callback\n    * lock_id should be valid, and cb should not be empty\n    *\n    * possible ec:\n    *   ERR_INVALID_PARAMETERS\n    *   ERR_OK, the pending lock is cancelled successfully\n    *   ERR_OBJECT_NOT_FOUND, the caller is not found in pending list, check\n    *   returned owner to see whether it already succeedes\n    *\n    */\n    virtual task_ptr cancel_pending_lock(const std::string &lock_id,\n                                         const std::string &myself_id,\n                                         task_code cb_code,\n                                         const lock_callback &cb) = 0;\n\n    /*\n     * unlock\n     * cb_code: the task code specifies where to execute the callback\n     * lock_id should be valid, and cb should not be empty\n     *\n     * possible ec:\n     *   ERR_INVALID_PARAMETERS\n     *   ERR_OK, release the lock successfully; if destroy==true, it also implies\n     *     that the lock is destroy successfully.\n     *   ERR_HOLD_BY_OTHERS, the lock is hold by others\n     *   ERR_TIMEOUT, operation timeout. If destroy==false, this implies the unlock-op\n     *     is timout; if destroy==true, it may be unlock-op or destroy-op who times out.\n     *     For the latter, user can use query_lock to check the status of the lock\n     */\n    virtual task_ptr unlock(const std::string &lock_id,\n                            const std::string &myself_id,\n                            bool destroy,\n                            task_code cb_code,\n                            const err_callback &cb) = 0;\n\n    /*\n     * cb_code: the task code specifies where to execute the callback\n     * cb shouldn't be empty\n     * possible ec:\n     *   ERR_OK: the lock is hold by someone, user can get the owner by\n     *     owner_id\n     *   ERR_OBJECT_NOT_FOUND: the lock doesn't exist\n     *   ERR_NO_OWNER, no one owns the lock\n     *   ERR_TIMEOUT, operation timeout\n     */\n    virtual task_ptr\n    query_lock(const std::string &lock_id, task_code cb_code, const lock_callback &cb) = 0;\n    /*\n     * error_code: err_invalid_parameters -> if the lock is created without cache enabled\n     *             err_object_not_found -> no lock created with lock_id\n     *             err_ok -> query the cache successfully\n     */\n    virtual error_code query_cache(const std::string &lock_id,\n                                   /*out*/ std::string &owner,\n                                   /*out*/ uint64_t &version) = 0;\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/dist/failure_detector/failure_detector.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     interface for a perfect failure detector\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     Dec., 2015, @shengofsun (Weijie Sun), make zlock preoteced,\n *                 give the subClasses flexibility\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n/*\n * Notes on the failure detector:\n *\n * 1. Due to the fact that we can only check the liveness inside check-all-records call,\n *    which happens every \"check_interval_seconds\" seconds, worker may disconnect from master\n *    in the period earlier than the lease_seconds to ensure the perfect FD.\n *    In the worst case, workers may disconnect themselves\n *    after \"lease\"-\"check_interval_seconds\" seconds;\n *\n *    Similarily, master may claim a worker dead more slowly even the workers are dead\n *    for longer than grace_seconds. In the worst case, it will be\n *    \"grace\"+\"check_interval_seconds\" seconds.\n *\n * 2. In practice, your should set check_interval_seconds a small value for a fine-grained FD.\n *    For client, you may set it as 2 second as it usually connect to a small number of masters.\n *    For master, you may set it as 5 or 10 seconds.\n *\n * 3. We should always use dedicated thread pools for THREAD_POOL_FD,\n *    and set thread priority to being highest so as to minimize the performance\n *    interference with other workloads.\n *\n * 4. The lease_periods must be less than the grace_periods, as required by prefect FD.\n *\n */\n#pragma once\n\n#include <dsn/dist/failure_detector/fd.client.h>\n#include <dsn/dist/failure_detector/fd.server.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\nnamespace fd {\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_FD)\nDEFINE_TASK_CODE(LPC_BEACON_CHECK, TASK_PRIORITY_HIGH, THREAD_POOL_FD)\nDEFINE_TASK_CODE(LPC_BEACON_SEND, TASK_PRIORITY_HIGH, THREAD_POOL_FD)\n\nclass failure_detector_callback\n{\npublic:\n    virtual ~failure_detector_callback() {}\n\n    // worker side\n    virtual void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0;\n    virtual void on_master_connected(::dsn::rpc_address node) = 0;\n\n    // master side\n    virtual void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) = 0;\n    virtual void on_worker_connected(::dsn::rpc_address node) = 0;\n};\n\nclass failure_detector : public failure_detector_service,\n                         public failure_detector_client,\n                         public failure_detector_callback\n{\npublic:\n    failure_detector();\n    virtual ~failure_detector();\n\n    virtual void on_ping(const beacon_msg &beacon, ::dsn::rpc_replier<beacon_ack> &reply);\n\n    virtual void end_ping(::dsn::error_code err, const beacon_ack &ack, void *context);\n\n    virtual void register_ctrl_commands();\n    virtual void unregister_ctrl_commands();\n\npublic:\n    error_code start(uint32_t check_interval_seconds,\n                     uint32_t beacon_interval_seconds,\n                     uint32_t lease_seconds,\n                     uint32_t grace_seconds,\n                     bool use_allow_list = false);\n\n    void stop();\n\n    uint32_t get_lease_ms() const { return _lease_milliseconds; }\n    uint32_t get_grace_ms() const { return _grace_milliseconds; }\n\n    void register_master(::dsn::rpc_address target);\n\n    bool switch_master(::dsn::rpc_address from, ::dsn::rpc_address to, uint32_t delay_milliseconds);\n\n    bool unregister_master(::dsn::rpc_address node);\n\n    virtual bool is_master_connected(::dsn::rpc_address node) const;\n\n    // ATTENTION: be very careful to set is_connected to false as\n    // workers are always considered *connected* initially which is ok even when workers think\n    // master is disconnected\n    // Considering workers *disconnected* initially is *dangerous* coz it may violate the invariance\n    // when workers think they are online\n    void register_worker(::dsn::rpc_address node, bool is_connected = true);\n\n    bool unregister_worker(::dsn::rpc_address node);\n\n    void clear_workers();\n\n    virtual bool is_worker_connected(::dsn::rpc_address node) const;\n\n    void add_allow_list(::dsn::rpc_address node);\n\n    bool remove_from_allow_list(::dsn::rpc_address node);\n\n    void set_allow_list(const std::vector<std::string> &replica_addrs);\n\n    std::string get_allow_list(const std::vector<std::string> &args) const;\n\n    int worker_count() const { return static_cast<int>(_workers.size()); }\n\n    int master_count() const { return static_cast<int>(_masters.size()); }\n\nprotected:\n    void on_ping_internal(const beacon_msg &beacon, /*out*/ beacon_ack &ack);\n\n    // return false when the ack is not applicable\n    bool end_ping_internal(::dsn::error_code err, const beacon_ack &ack);\n\n    bool is_time_greater_than(uint64_t ts, uint64_t base);\n\n    void report(::dsn::rpc_address node, bool is_master, bool is_connected);\n\nprivate:\n    void check_all_records();\n\nprivate:\n    class master_record\n    {\n    public:\n        ::dsn::rpc_address node;\n        uint64_t last_send_time_for_beacon_with_ack;\n        bool is_alive;\n        bool rejected;\n        task_ptr send_beacon_timer;\n\n        // masters are always considered *disconnected* initially which is ok even when master\n        // thinks workers are connected\n        master_record(::dsn::rpc_address n, uint64_t last_send_time_for_beacon_with_ack_)\n        {\n            node = n;\n            last_send_time_for_beacon_with_ack = last_send_time_for_beacon_with_ack_;\n            is_alive = false;\n            rejected = false;\n        }\n    };\n\n    class worker_record\n    {\n    public:\n        ::dsn::rpc_address node;\n        uint64_t last_beacon_recv_time;\n        bool is_alive;\n\n        // workers are always considered *connected* initially which is ok even when workers think\n        // master is disconnected\n        worker_record(::dsn::rpc_address node, uint64_t last_beacon_recv_time)\n        {\n            this->node = node;\n            this->last_beacon_recv_time = last_beacon_recv_time;\n            is_alive = true;\n        }\n    };\n\nprivate:\n    typedef std::unordered_map<::dsn::rpc_address, master_record> master_map;\n    typedef std::unordered_map<::dsn::rpc_address, worker_record> worker_map;\n\n    // allow list are set on machine name (port can vary)\n    typedef std::unordered_set<::dsn::rpc_address> allow_list;\n\n    master_map _masters;\n    worker_map _workers;\n\n    uint32_t _check_interval_milliseconds;\n    uint32_t _beacon_interval_milliseconds;\n    uint32_t _beacon_timeout_milliseconds;\n    uint32_t _lease_milliseconds;\n    uint32_t _grace_milliseconds;\n    bool _is_started;\n    ::dsn::task_ptr _check_task;\n\n    bool _use_allow_list;\n    allow_list _allow_list;\n\n    perf_counter_wrapper _recent_beacon_fail_count;\n\n    dsn_handle_t _get_allow_list = nullptr;\n\nprotected:\n    mutable zlock _lock;\n    dsn::task_tracker _tracker;\n\n    // subClass can rewrite these method.\n    virtual void send_beacon(::dsn::rpc_address node, uint64_t time);\n};\n}\n} // end namespace\n"
  },
  {
    "path": "include/dsn/dist/failure_detector/fd.client.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n#include \"fd.code.definition.h\"\n#include <iostream>\n#include <dsn/utility/optional.h>\n#include <dsn/tool-api/async_calls.h>\n\nnamespace dsn {\nnamespace fd {\nclass failure_detector_client\n{\npublic:\n    failure_detector_client(::dsn::rpc_address server) { _server = server; }\n    failure_detector_client() {}\n    virtual ~failure_detector_client() {}\n\n    // ---------- call RPC_FD_FAILURE_DETECTOR_PING ------------\n    // - synchronous\n    std::pair<::dsn::error_code, beacon_ack>\n    ping_sync(const beacon_msg &beacon,\n              std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n              int thread_hash = 0,\n              uint64_t partition_hash = 0,\n              dsn::optional<::dsn::rpc_address> server_addr = dsn::none)\n    {\n        return dsn::rpc::wait_and_unwrap<beacon_ack>(\n            ::dsn::rpc::call(server_addr.unwrap_or(_server),\n                             RPC_FD_FAILURE_DETECTOR_PING,\n                             beacon,\n                             nullptr,\n                             empty_rpc_handler,\n                             timeout,\n                             thread_hash,\n                             partition_hash));\n    }\n\n    // - asynchronous with on-stack beacon_msg and beacon_ack\n    template <typename TCallback>\n    ::dsn::task_ptr ping(const beacon_msg &beacon,\n                         TCallback &&callback,\n                         std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n                         int thread_hash = 0,\n                         uint64_t partition_hash = 0,\n                         int reply_thread_hash = 0,\n                         dsn::optional<::dsn::rpc_address> server_addr = dsn::none)\n    {\n        return ::dsn::rpc::call(server_addr.unwrap_or(_server),\n                                RPC_FD_FAILURE_DETECTOR_PING,\n                                beacon,\n                                this,\n                                std::forward<TCallback>(callback),\n                                timeout,\n                                thread_hash,\n                                partition_hash,\n                                reply_thread_hash);\n    }\n\nprivate:\n    ::dsn::rpc_address _server;\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/dist/failure_detector/fd.code.definition.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/task_code.h>\n#include <dsn/service_api_cpp.h>\n#include \"fd_types.h\"\n\nnamespace dsn {\nnamespace fd {\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_DEFAULT)\n\n// define RPC task code for service 'failure_detector'\nDEFINE_TASK_CODE_RPC(RPC_FD_FAILURE_DETECTOR_PING, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n// test timer task code\nDEFINE_TASK_CODE(LPC_FD_TEST_TIMER, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\ninline bool is_failure_detector_message(dsn::task_code code)\n{\n    return code == RPC_FD_FAILURE_DETECTOR_PING || code == RPC_FD_FAILURE_DETECTOR_PING_ACK;\n}\n} // namespace fd\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/failure_detector/fd.server.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n#include \"fd.code.definition.h\"\n#include <iostream>\n\nnamespace dsn {\nnamespace fd {\nclass failure_detector_service : public ::dsn::serverlet<failure_detector_service>\n{\npublic:\n    failure_detector_service() : ::dsn::serverlet<failure_detector_service>(\"failure_detector\") {}\n    virtual ~failure_detector_service() {}\n\nprotected:\n    // all service handlers to be implemented further\n    // RPC_FD_FAILURE_DETECTOR_PING\n    virtual void on_ping(const beacon_msg &beacon, ::dsn::rpc_replier<beacon_ack> &reply)\n    {\n        std::cout << \"... exec RPC_FD_FAILURE_DETECTOR_PING ... (not implemented) \" << std::endl;\n        beacon_ack resp;\n        reply(resp);\n    }\n\npublic:\n    void open_service()\n    {\n        this->register_async_rpc_handler(\n            RPC_FD_FAILURE_DETECTOR_PING, \"ping\", &failure_detector_service::on_ping);\n    }\n\n    void close_service() { this->unregister_rpc_handler(RPC_FD_FAILURE_DETECTOR_PING); }\n};\n}\n}"
  },
  {
    "path": "include/dsn/dist/failure_detector.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/dist/failure_detector/failure_detector.h>\n"
  },
  {
    "path": "include/dsn/dist/failure_detector_multimaster.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/group_address.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/failure_detector.h>\n#include <functional>\n\nnamespace dsn {\nnamespace dist {\n\nclass slave_failure_detector_with_multimaster : public dsn::fd::failure_detector\n{\npublic:\n    slave_failure_detector_with_multimaster(std::vector<::dsn::rpc_address> &meta_servers,\n                                            std::function<void()> &&master_disconnected_callback,\n                                            std::function<void()> &&master_connected_callback);\n    virtual ~slave_failure_detector_with_multimaster() {}\n\n    void end_ping(::dsn::error_code err, const fd::beacon_ack &ack, void *context) override;\n\n    // client side\n    void on_master_disconnected(const std::vector<::dsn::rpc_address> &nodes) override;\n    void on_master_connected(::dsn::rpc_address node) override;\n\n    // server side\n    void on_worker_disconnected(const std::vector<::dsn::rpc_address> &nodes) override\n    {\n        dassert(false, \"invalid execution flow\");\n    }\n    void on_worker_connected(::dsn::rpc_address node) override\n    {\n        dassert(false, \"invalid execution flow\");\n    }\n\n    ::dsn::rpc_address current_server_contact() const;\n    ::dsn::rpc_address get_servers() const { return _meta_servers; }\n\n    void set_leader_for_test(dsn::rpc_address meta);\n\nprivate:\n    dsn::rpc_address _meta_servers;\n    std::function<void()> _master_disconnected_callback;\n    std::function<void()> _master_connected_callback;\n};\n\n//------------------ inline implementation --------------------------------\ninline ::dsn::rpc_address slave_failure_detector_with_multimaster::current_server_contact() const\n{\n    zauto_lock l(failure_detector::_lock);\n    return _meta_servers.group_address()->leader();\n}\n}\n} // end namespace\n"
  },
  {
    "path": "include/dsn/dist/fmt_logging.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\n#include <fmt/ostream.h>\n\n// The macros below no longer use the default snprintf method for log message formatting,\n// instead we use fmt::format.\n// TODO(wutao1): prevent construction of std::string for each log.\n\n#define dlog_f(level, ...)                                                                         \\\n    do {                                                                                           \\\n        if (level >= dsn_log_start_level)                                                          \\\n            dsn_log(                                                                               \\\n                __FILENAME__, __FUNCTION__, __LINE__, level, fmt::format(__VA_ARGS__).c_str());    \\\n    } while (false)\n#define dinfo_f(...) dlog_f(LOG_LEVEL_INFORMATION, __VA_ARGS__)\n#define ddebug_f(...) dlog_f(LOG_LEVEL_DEBUG, __VA_ARGS__)\n#define dwarn_f(...) dlog_f(LOG_LEVEL_WARNING, __VA_ARGS__)\n#define derror_f(...) dlog_f(LOG_LEVEL_ERROR, __VA_ARGS__)\n#define dfatal_f(...) dlog_f(LOG_LEVEL_FATAL, __VA_ARGS__)\n#define dassert_f(x, ...)                                                                          \\\n    do {                                                                                           \\\n        if (dsn_unlikely(!(x))) {                                                                  \\\n            dlog_f(LOG_LEVEL_FATAL, \"assertion expression: \" #x);                                  \\\n            dlog_f(LOG_LEVEL_FATAL, __VA_ARGS__);                                                  \\\n            dsn_coredump();                                                                        \\\n        }                                                                                          \\\n    } while (false)\n\n// Macros for writing log message prefixed by gpid and address.\n#define dinfo_replica(...) dinfo_f(\"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n#define ddebug_replica(...) ddebug_f(\"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n#define dwarn_replica(...) dwarn_f(\"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n#define derror_replica(...) derror_f(\"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n#define dfatal_replica(...) dfatal_f(\"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n#define dassert_replica(x, ...) dassert_f(x, \"[{}] {}\", replica_name(), fmt::format(__VA_ARGS__))\n\n// Macros to check expected condition. It will abort the application\n// and log a fatal message when the condition is not met.\n#define dcheck_eq(var1, var2) dassert_f(var1 == var2, \"{} vs {}\", var1, var2)\n#define dcheck_ge(var1, var2) dassert_f(var1 >= var2, \"{} vs {}\", var1, var2)\n#define dcheck_le(var1, var2) dassert_f(var1 <= var2, \"{} vs {}\", var1, var2)\n#define dcheck_gt(var1, var2) dassert_f(var1 > var2, \"{} vs {}\", var1, var2)\n#define dcheck_lt(var1, var2) dassert_f(var1 < var2, \"{} vs {}\", var1, var2)\n\n#define dcheck_eq_replica(var1, var2) dassert_replica(var1 == var2, \"{} vs {}\", var1, var2)\n#define dcheck_ge_replica(var1, var2) dassert_replica(var1 >= var2, \"{} vs {}\", var1, var2)\n#define dcheck_le_replica(var1, var2) dassert_replica(var1 <= var2, \"{} vs {}\", var1, var2)\n#define dcheck_gt_replica(var1, var2) dassert_replica(var1 > var2, \"{} vs {}\", var1, var2)\n#define dcheck_lt_replica(var1, var2) dassert_replica(var1 < var2, \"{} vs {}\", var1, var2)\n\n// Return the given status if condition is not true.\n#define ERR_LOG_AND_RETURN_NOT_TRUE(s, err, ...)                                                   \\\n    do {                                                                                           \\\n        if (dsn_unlikely(!(s))) {                                                                  \\\n            derror_f(\"{}: {}\", err, fmt::format(__VA_ARGS__));                                     \\\n            return err;                                                                            \\\n        }                                                                                          \\\n    } while (0)\n\n// Return the given status if it is not ERR_OK.\n#define ERR_LOG_AND_RETURN_NOT_OK(s, ...)                                                          \\\n    do {                                                                                           \\\n        error_code _err = (s);                                                                     \\\n        ERR_LOG_AND_RETURN_NOT_TRUE(_err == ERR_OK, _err, __VA_ARGS__);                            \\\n    } while (0)\n"
  },
  {
    "path": "include/dsn/dist/meta_state_service.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     interface of the reliable meta state service\n *     it is usually for storing meta state of dist systems, such as membership\n *\n * Revision history:\n *     2015-10-28, Weijie Sun, first version\n *     2015-11-05, @imzhenyu (Zhenyu Guo), adjust the interface, so that\n *                (1) return task_ptr/tracker for callers to cancel or wait;\n *                (2) add factory for provider registration;\n *                (3) add cb_code parameter, then users can specify where the callback\n *                    should be executed\n *     2015-11-06, @imzhenyu (Zhenyu Guo), add watch/unwatch API\n *     2015-12-28, @shengofsun (Weijie SUn), add transaction api\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/future_types.h>\n#include <string>\n#include <functional>\n\nnamespace dsn {\nnamespace dist {\ntypedef std::function<void(error_code ec, const blob &val)> err_value_callback;\ntypedef future_task<error_code, blob> err_value_future;\ntypedef dsn::ref_ptr<err_value_future> err_value_future_ptr;\n\ntypedef std::function<void(error_code ec, const std::vector<std::string> &ret_strv)>\n    err_stringv_callback;\ntypedef future_task<error_code, std::vector<std::string>> err_stringv_future;\ntypedef dsn::ref_ptr<err_stringv_future> err_stringv_future_ptr;\n\nclass meta_state_service\n{\npublic:\n    template <typename T>\n    static meta_state_service *create()\n    {\n        return new T();\n    }\n\n    typedef meta_state_service *(*factory)();\n\npublic:\n    /* providers should implement this to support transaction */\n    class transaction_entries\n    {\n    public:\n        virtual ~transaction_entries() {}\n        virtual error_code create_node(const std::string &node, const blob &value = blob()) = 0;\n        virtual error_code delete_node(const std::string &node) = 0;\n        virtual error_code set_data(const std::string &node, const blob &value = blob()) = 0;\n\n        virtual error_code get_result(unsigned int entry_index) = 0;\n    };\n\npublic:\n    virtual ~meta_state_service() {}\n    /*\n     * initialization work\n     */\n    virtual error_code initialize(const std::vector<std::string> &args) = 0;\n\n    /*\n     * finalize work\n     */\n    virtual error_code finalize() = 0;\n\n    /*\n     * create a transaction_entries structure\n     * capacity: the maximum entries the structure can hold\n     */\n    virtual std::shared_ptr<transaction_entries> new_transaction_entries(unsigned int capacity) = 0;\n\n    /*\n     * submit transaction, it should be all succeeded or all failed\n     * cb_code: the task code specifies where to execute the callback\n     * cb_transaction: callback, ec to indicate success or failure reason\n     * tracker: to track (wait/cancel) whether the callback is executed\n     */\n    virtual task_ptr submit_transaction(const std::shared_ptr<transaction_entries> &entries,\n                                        task_code cb_code,\n                                        const err_callback &cb_transaction,\n                                        dsn::task_tracker *tracker = nullptr) = 0;\n\n    /*\n     * create a dir node\n     * node: the dir name with full path\n     * cb_code: the task code specifies where to execute the callback\n     * cb_create: create callback, ec to indicate success or failure reason\n     * value: the data value to store in the node\n     * tracker: to track (wait/cancel) whether the callback is executed\n     */\n    virtual task_ptr create_node(const std::string &node,\n                                 task_code cb_code,\n                                 const err_callback &cb_create,\n                                 const blob &value = blob(),\n                                 dsn::task_tracker *tracker = nullptr) = 0;\n    /*\n     * delete a dir, the directory may be empty or not\n     * node: the dir name with full path\n     * recursively_delete: true for recursively delete non-empty node,\n     *                     false for failure\n     * cb_code: the task code specifies where to execute the callback\n     * cb_delete: delete callback, ec to indicate success or failure reason\n     */\n    virtual task_ptr delete_node(const std::string &node,\n                                 bool recursively_delete,\n                                 task_code cb_code,\n                                 const err_callback &cb_delete,\n                                 dsn::task_tracker *tracker = nullptr) = 0;\n    /*\n     * check if the node dir exists\n     * node: the dir name with full path\n     * cb_code: the task code specifies where to execute the callback\n     * cb_exist: callback to indicate the check result\n     */\n    virtual task_ptr node_exist(const std::string &node,\n                                task_code cb_code,\n                                const err_callback &cb_exist,\n                                dsn::task_tracker *tracker = nullptr) = 0;\n    /*\n     * get the data in node\n     * node: dir name with full path\n     * cb_code: the task code specifies where to execute the callback\n     * cb_get_data: callback. If success, ec indicate the success and\n     *              node data is returned in ret_str\n     *              or-else user get the fail reason in ec\n     */\n    virtual task_ptr get_data(const std::string &node,\n                              task_code cb_code,\n                              const err_value_callback &cb_get_data,\n                              dsn::task_tracker *tracker = nullptr) = 0;\n    /*\n     * set the data of the node\n     * node: dir name with full path\n     * value: the value\n     * cb_code: the task code specifies where to execute the callback\n     * cb_set_data: the callback to indicate the set result\n     */\n    virtual task_ptr set_data(const std::string &node,\n                              const blob &value,\n                              task_code cb_code,\n                              const err_callback &cb_set_data,\n                              dsn::task_tracker *tracker = nullptr) = 0;\n    /*\n     * get all childrens of a node\n     * node: dir name with full path\n     * cb_code: the task code specifies where to execute the callback\n     * cb_get_children: if success, ret_strv store the node names of children\n     */\n    virtual task_ptr get_children(const std::string &node,\n                                  task_code cb_code,\n                                  const err_stringv_callback &cb_get_children,\n                                  dsn::task_tracker *tracker = nullptr) = 0;\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/dist/nfs_node.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <string>\n#include <memory>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/binary_reader.h>\n#include <dsn/utility/binary_writer.h>\n#include <dsn/tool-api/aio_task.h>\n\nnamespace dsn {\n\nstruct remote_copy_request\n{\n    dsn::rpc_address source;\n    std::string source_disk_tag;\n    std::string source_dir;\n    std::vector<std::string> files;\n    std::string dest_disk_tag;\n    std::string dest_dir;\n    bool overwrite;\n    bool high_priority;\n};\n\nclass nfs_node\n{\npublic:\n    static std::unique_ptr<nfs_node> create();\n\npublic:\n    aio_task_ptr copy_remote_directory(const rpc_address &remote,\n                                       const std::string &source_disk_tag,\n                                       const std::string &source_dir,\n                                       const std::string &dest_disk_tag,\n                                       const std::string &dest_dir,\n                                       bool overwrite,\n                                       bool high_priority,\n                                       task_code callback_code,\n                                       task_tracker *tracker,\n                                       aio_handler &&callback,\n                                       int hash = 0);\n    aio_task_ptr copy_remote_files(const rpc_address &remote,\n                                   const std::string &source_disk_tag,\n                                   const std::string &source_dir,\n                                   const std::vector<std::string> &files, // empty for all\n                                   const std::string &dest_disk_tag,\n                                   const std::string &dest_dir,\n                                   bool overwrite,\n                                   bool high_priority,\n                                   task_code callback_code,\n                                   task_tracker *tracker,\n                                   aio_handler &&callback,\n                                   int hash = 0);\n\n    aio_task_ptr copy_remote_files(std::shared_ptr<remote_copy_request> &request,\n                                   task_code callback_code,\n                                   task_tracker *tracker,\n                                   aio_handler &&callback,\n                                   int hash = 0);\n\n    nfs_node() {}\n    virtual ~nfs_node() {}\n    virtual error_code start() = 0;\n    virtual error_code stop() = 0;\n\nprotected:\n    virtual void call(std::shared_ptr<remote_copy_request> rci, aio_task *callback) = 0;\n};\n}\n"
  },
  {
    "path": "include/dsn/dist/remote_command.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n#include <dsn/tool-api/task.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace cmd {\n\n/// Calls a remote command to the remote server.\ntask_ptr async_call_remote(rpc_address remote,\n                           const std::string &cmd,\n                           const std::vector<std::string> &arguments,\n                           std::function<void(error_code, const std::string &)> callback,\n                           std::chrono::milliseconds timeout = std::chrono::milliseconds(0));\n\n/// Registers the server-side RPC handler of remote commands.\nbool register_remote_command_rpc();\n\n} // namespace cmd\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/duplication_common.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\n#include <dsn/cpp/rpc_holder.h>\n#include <dsn/utility/errors.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint32(duplicate_log_batch_bytes);\n\ntypedef rpc_holder<duplication_modify_request, duplication_modify_response> duplication_modify_rpc;\ntypedef rpc_holder<duplication_add_request, duplication_add_response> duplication_add_rpc;\ntypedef rpc_holder<duplication_query_request, duplication_query_response> duplication_query_rpc;\ntypedef rpc_holder<duplication_sync_request, duplication_sync_response> duplication_sync_rpc;\n\ntypedef int32_t dupid_t;\n\nextern const char *duplication_status_to_string(duplication_status::type status);\n\nextern const char *duplication_fail_mode_to_string(duplication_fail_mode::type);\n\ninline bool is_duplication_status_invalid(duplication_status::type status)\n{\n    return status == duplication_status::DS_INIT || status == duplication_status::DS_REMOVED;\n}\n\n/// Returns the cluster id of url specified in the duplication-group section\n/// of your configuration, for example:\n///\n/// ```\n///   [duplication-group]\n///       wuhan-mi-srv-ad = 3\n///       tianjin-mi-srv-ad = 4\n/// ```\n///\n/// The returned cluster id of get_duplication_cluster_id(\"wuhan-mi-srv-ad\") is 3.\nextern error_with<uint8_t> get_duplication_cluster_id(const std::string &cluster_name);\n\n/// Returns a json string.\nextern std::string duplication_entry_to_string(const duplication_entry &dup);\n\n/// Returns a json string.\nextern std::string duplication_query_response_to_string(const duplication_query_response &);\n\n/// Returns a mapping from cluster_name to cluster_id.\nextern const std::map<std::string, uint8_t> &get_duplication_group();\n\nextern const std::set<uint8_t> &get_distinct_cluster_id_set();\n\ninline bool is_cluster_id_configured(uint8_t cid)\n{\n    return get_distinct_cluster_id_set().find(cid) != get_distinct_cluster_id_set().end();\n}\n\nstruct duplication_constants\n{\n    const static std::string kDuplicationCheckpointRootDir;\n    const static std::string kClustersSectionName;\n    // These will fill into app env and mark one app as a \"follower app\" and record master info\n    const static std::string kDuplicationEnvMasterClusterKey;\n    const static std::string kDuplicationEnvMasterMetasKey;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/meta_service_app.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <dsn/cpp/service_app.h>\n\nnamespace dsn {\nclass version_http_service;\nnamespace replication {\n\nclass meta_service;\n\nnamespace test {\nclass test_checker;\n}\n} // namespace replication\n} // namespace dsn\n\nnamespace dsn {\n\nnamespace service {\n\nclass meta_service_app : public service_app\n{\npublic:\n    static void register_components();\n    static void register_all();\n\n    meta_service_app(const service_app_info *info);\n    virtual ~meta_service_app();\n\n    virtual ::dsn::error_code start(const std::vector<std::string> &args) override;\n\n    virtual ::dsn::error_code stop(bool cleanup = false) override;\n\nprivate:\n    friend class ::dsn::replication::test::test_checker;\n    std::unique_ptr<dsn::replication::meta_service> _service;\n};\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/mutation_duplicator.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replica_base.h>\n#include <dsn/cpp/pipeline.h>\n\nnamespace dsn {\nnamespace replication {\n\n/// \\brief Each of the mutation is a tuple made up of\n/// <timestamp, task_code, dsn::blob>.\n/// dsn::blob is the content of the mutation.\ntypedef std::tuple<uint64_t, task_code, blob> mutation_tuple;\n\n/// mutations are sorted by timestamp in mutation_tuple_set.\nstruct mutation_tuple_cmp\n{\n    inline bool operator()(const mutation_tuple &lhs, const mutation_tuple &rhs) const\n    {\n        // different mutations is probable to be batched together\n        // and sharing the same timestamp, so here we also compare\n        // the data pointer.\n        if (std::get<0>(lhs) == std::get<0>(rhs)) {\n            return std::get<2>(lhs).data() < std::get<2>(rhs).data();\n        }\n        return std::get<0>(lhs) < std::get<0>(rhs);\n    }\n};\ntypedef std::set<mutation_tuple, mutation_tuple_cmp> mutation_tuple_set;\n\n/// \\brief This is an interface for handling the mutation logs intended to\n/// be duplicated to remote cluster.\n/// \\see dsn::replication::replica_duplicator\nclass mutation_duplicator : public replica_base\n{\npublic:\n    typedef std::function<void(size_t /*total_shipped_size*/)> callback;\n\n    /// Duplicate the provided mutations to the remote cluster.\n    /// The implementation must be non-blocking.\n    ///\n    /// \\param cb: Call it when all the given mutations were sent successfully\n    virtual void duplicate(mutation_tuple_set mutations, callback cb) = 0;\n\n    // Singleton creator of mutation_duplicator.\n    static std::function<std::unique_ptr<mutation_duplicator>(\n        replica_base *, string_view /*remote cluster*/, string_view /*app name*/)>\n        creator;\n\n    explicit mutation_duplicator(replica_base *r) : replica_base(r) {}\n\n    virtual ~mutation_duplicator() = default;\n\n    void set_task_environment(pipeline::environment *env) { _env = *env; }\n\nprotected:\n    friend class replica_duplicator_test;\n\n    pipeline::environment _env;\n};\n\ninline std::unique_ptr<mutation_duplicator>\nnew_mutation_duplicator(replica_base *r, string_view remote_cluster_address, string_view app)\n{\n    return mutation_duplicator::creator(r, remote_cluster_address, app);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/mutation_log_tool.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <string>\n#include <iostream>\n#include <functional>\n#include <dsn/tool-api/rpc_message.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation_log_tool\n{\npublic:\n    bool\n    dump(const std::string &log_dir,\n         std::ostream &output,\n         std::function<void(\n             int64_t decree, int64_t timestamp, dsn::message_ex **requests, int count)> callback);\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/dist/replication/partition_resolver.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/gpid.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/async_calls.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass partition_resolver : public ref_counter\n{\npublic:\n    static dsn::ref_ptr<partition_resolver>\n    get_resolver(const char *cluster_name,\n                 const std::vector<dsn::rpc_address> &meta_list,\n                 const char *app_name);\n\n    template <typename TReq, typename TCallback>\n    dsn::rpc_response_task_ptr call_op(dsn::task_code code,\n                                       TReq &&request,\n                                       dsn::task_tracker *tracker,\n                                       TCallback &&callback,\n                                       std::chrono::milliseconds timeout,\n                                       uint64_t partition_hash,\n                                       int reply_hash = 0)\n    {\n        dsn::message_ex *msg = dsn::message_ex::create_request(\n            code, static_cast<int>(timeout.count()), 0, partition_hash);\n        marshall(msg, std::forward<TReq>(request));\n        dsn::rpc_response_task_ptr response_task = rpc::create_rpc_response_task(\n            msg, tracker, std::forward<TCallback>(callback), reply_hash);\n        call_task(response_task);\n        return response_task;\n    }\n\n    // choosing a proper replica server from meta server or local route cache\n    // and send the read/write request.\n    // if got reply or error, call the callback.\n    // parameters like request data, timeout, callback handler are all wrapped\n    // into \"task\", you may want to refer to dsn::rpc_response_task for details.\n    void call_task(const dsn::rpc_response_task_ptr &task);\n\n    std::string get_app_name() const { return _app_name; }\n\n    dsn::rpc_address get_meta_server() const { return _meta_server; }\n\nprotected:\n    partition_resolver(rpc_address meta_server, const char *app_name)\n        : _app_name(app_name), _meta_server(meta_server)\n    {\n    }\n\n    virtual ~partition_resolver() {}\n\n    struct resolve_result\n    {\n        ///< ERR_OK\n        ///< ERR_SERVICE_NOT_FOUND if resolver or app is missing\n        ///< ERR_IO_PENDING if resolve in is progress, callers\n        ///< should call resolve_async in this case\n        error_code err;\n        ///< IPv4 of the target to send request to\n        rpc_address address;\n        ///< global partition indentity\n        dsn::gpid pid;\n    };\n\n    /**\n     * resolve partition_hash into IP or group addresses to know what to connect next\n     *\n     * \\param partition_hash the partition hash\n     * \\param callback       callback invoked on completion or timeout\n     * \\param timeout_ms     timeout to execute the callback\n     *\n     * \\return see \\ref resolve_result for details\n     */\n    virtual void resolve(uint64_t partition_hash,\n                         std::function<void(resolve_result &&)> &&callback,\n                         int timeout_ms) = 0;\n\n    /*!\n     failure handler when access failed for certain partition\n\n     \\param partition_index zero-based index of the partition.\n     \\param err             error code\n\n     this is usually to trigger new round of address resolve\n     */\n    virtual void on_access_failure(int partition_index, error_code err) = 0;\n\n    /**\n     * get zero-based partition index\n     *\n     * \\param partition_count number of partitions.\n     * \\param partition_hash  the partition hash.\n     *\n     * \\return zero-based partition index.\n     */\n\n    virtual int get_partition_index(int partition_count, uint64_t partition_hash) = 0;\n\n    std::string _cluster_name;\n    std::string _app_name;\n    rpc_address _meta_server;\n};\n\ntypedef ref_ptr<partition_resolver> partition_resolver_ptr;\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replica_base.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/gpid.h>\n#include <dsn/utility/string_view.h>\n\nnamespace dsn {\nnamespace replication {\n\n/// Base class for types that are one-instance-per-replica.\nstruct replica_base\n{\n    replica_base(gpid id, string_view name, string_view app_name)\n        : _gpid(id), _name(name), _app_name(app_name)\n    {\n    }\n\n    explicit replica_base(replica_base *rhs)\n        : replica_base(rhs->get_gpid(), rhs->replica_name(), rhs->_app_name)\n    {\n    }\n\n    gpid get_gpid() const { return _gpid; }\n\n    const char *replica_name() const { return _name.c_str(); }\n\n    const char *app_name() const { return _app_name.c_str(); }\n\nprivate:\n    const gpid _gpid;\n    const std::string _name;\n    const std::string _app_name;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replica_envs.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <cstdint>\n#include <string>\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_envs\n{\npublic:\n    static const std::string DENY_CLIENT_REQUEST;\n    static const std::string WRITE_QPS_THROTTLING;\n    static const std::string WRITE_SIZE_THROTTLING;\n    static const uint64_t MIN_SLOW_QUERY_THRESHOLD_MS;\n    static const std::string SLOW_QUERY_THRESHOLD;\n    static const std::string TABLE_LEVEL_DEFAULT_TTL;\n    static const std::string ROCKSDB_USAGE_SCENARIO;\n    static const std::string ROCKSDB_CHECKPOINT_RESERVE_MIN_COUNT;\n    static const std::string ROCKSDB_CHECKPOINT_RESERVE_TIME_SECONDS;\n    static const std::string ROCKSDB_ITERATION_THRESHOLD_TIME_MS;\n    static const std::string ROCKSDB_BLOCK_CACHE_ENABLED;\n    static const std::string MANUAL_COMPACT_DISABLED;\n    static const std::string MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT;\n    static const std::string MANUAL_COMPACT_ONCE_TRIGGER_TIME;\n    static const std::string MANUAL_COMPACT_ONCE_TARGET_LEVEL;\n    static const std::string MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION;\n    static const std::string MANUAL_COMPACT_PERIODIC_TRIGGER_TIME;\n    static const std::string MANUAL_COMPACT_PERIODIC_TARGET_LEVEL;\n    static const std::string MANUAL_COMPACT_PERIODIC_BOTTOMMOST_LEVEL_COMPACTION;\n    static const std::string BUSINESS_INFO;\n    static const std::string REPLICA_ACCESS_CONTROLLER_ALLOWED_USERS;\n    static const std::string READ_QPS_THROTTLING;\n    static const std::string READ_SIZE_THROTTLING;\n    static const std::string BACKUP_REQUEST_QPS_THROTTLING;\n    static const std::string SPLIT_VALIDATE_PARTITION_HASH;\n    static const std::string USER_SPECIFIED_COMPACTION;\n    static const std::string ROCKSDB_ALLOW_INGEST_BEHIND;\n    static const std::string UPDATE_MAX_REPLICA_COUNT;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replica_test_utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/// This file contains utilities for upper level applications (pegasus) which\n/// needs the hidden abstraction of rDSN in order to make unit test.\n\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass replica;\nclass replica_stub;\n\nextern replica *create_test_replica(replica_stub *stub,\n                                    gpid gpid,\n                                    const app_info &app,\n                                    const char *dir,\n                                    bool restore_if_necessary,\n                                    bool is_duplication_follower);\n\nextern replica_stub *create_test_replica_stub();\n\nextern void destroy_replica(replica *r);\n\nextern void destroy_replica_stub(replica_stub *rs);\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication.codes.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/auto_codes.h>\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_META_SERVER)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_META_STATE)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_REPLICATION)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_LOCAL_APP)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_REPLICATION_LONG)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_COMPACT)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_INGESTION)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_SLOG)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_PLOG)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_SCAN)\n\n#define DEFINE_STORAGE_WRITE_RPC_CODE(x, allow_batch, is_idempotent)                               \\\n    DEFINE_STORAGE_RPC_CODE(                                                                       \\\n        x, TASK_PRIORITY_LOW, THREAD_POOL_REPLICATION, true, allow_batch, is_idempotent)\n#define DEFINE_STORAGE_READ_RPC_CODE(x)                                                            \\\n    DEFINE_STORAGE_RPC_CODE(x, TASK_PRIORITY_COMMON, THREAD_POOL_LOCAL_APP, false, true, true)\n#define DEFINE_STORAGE_SCAN_RPC_CODE(x)                                                            \\\n    DEFINE_STORAGE_RPC_CODE(x, TASK_PRIORITY_COMMON, THREAD_POOL_SCAN, false, true, true)\n\n#define MAKE_EVENT_CODE(x, pri) DEFINE_TASK_CODE(x, pri, CURRENT_THREAD_POOL)\n#define MAKE_EVENT_CODE_AIO(x, pri) DEFINE_TASK_CODE_AIO(x, pri, CURRENT_THREAD_POOL)\n#define MAKE_EVENT_CODE_RPC(x, pri) DEFINE_TASK_CODE_RPC(x, pri, CURRENT_THREAD_POOL)\n\n// THREAD_POOL_DEFAULT\n#define CURRENT_THREAD_POOL dsn::THREAD_POOL_DEFAULT\nMAKE_EVENT_CODE(LPC_REPLICA_SERVER_DELAY_START, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_MUTATION_LOG_PENDING_TIMER, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_ASYNC_READ_COMPLETE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_ASYNC_WRITE_COMPLETE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICA_STATE_CHANGE_NOTIFICATION, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_REPORT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST2, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST3, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST4, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_TEST, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST_AGENT_WRITE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_TEST_AGENT_READ, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_AIO_TEST, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_AIO_IMMEDIATE_CALLBACK, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_WRITE_REPLICATION_LOG_COMMON, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_QUERY_CONFIGURATION_ALL, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_MEM_RELEASE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_CREATE_CHILD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_QUERY_DISK_INFO, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_REPLICA_DISK_MIGRATE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_DETECT_HOTKEY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_ADD_NEW_DISK, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_ANALYZE_HOTKEY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_BACKGROUND_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_BULK_LOAD_INGESTION, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_LATENCY_TRACE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_DUPLICATE_CHECKPOINT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_DUPLICATE_CHECKPOINT_COMPLETED, TASK_PRIORITY_COMMON)\n#undef CURRENT_THREAD_POOL\n\n// THREAD_POOL_META_SERVER\n#define CURRENT_THREAD_POOL THREAD_POOL_META_SERVER\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CONFIG_SYNC, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_UPDATE_PARTITION_CONFIGURATION, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CREATE_APP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_DROP_APP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_RECALL_APP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_LIST_APPS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_LIST_NODES, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CLUSTER_INFO, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CONTROL_META, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_RECOVERY, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_RESTORE, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_RESTORE_BACKGROUND, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_CM_PROPOSE_BALANCER, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_ADD_BACKUP_POLICY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_BACKUP_POLICY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_MODIFY_BACKUP_POLICY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_META_CALLBACK, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_QUERY_PN_DECREE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_REPORT_RESTORE_STATUS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_RESTORE_STATUS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_ADD_DUPLICATION, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_MODIFY_DUPLICATION, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_DUPLICATION, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_DUPLICATION_SYNC, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_UPDATE_APP_ENV, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_DDD_DIAGNOSE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_PARTITION_SPLIT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CONTROL_PARTITION_SPLIT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_PARTITION_SPLIT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_REGISTER_CHILD_REPLICA, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_NOTIFY_STOP_SPLIT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_CHILD_STATE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CONTROL_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_BULK_LOAD_STATUS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_CLEAR_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_BACKUP_APP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_BACKUP_STATUS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_START_MANUAL_COMPACT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_QUERY_MANUAL_COMPACT_STATUS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_GET_MAX_REPLICA_COUNT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CM_SET_MAX_REPLICA_COUNT, TASK_PRIORITY_COMMON)\n#undef CURRENT_THREAD_POOL\n\n#define CURRENT_THREAD_POOL THREAD_POOL_META_STATE\nMAKE_EVENT_CODE(LPC_META_STATE_HIGH, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_META_STATE_NORMAL, TASK_PRIORITY_COMMON)\n#undef CURRENT_THREAD_POOL\n\n// THREAD_POOL_REPLICATION\n#define CURRENT_THREAD_POOL THREAD_POOL_REPLICATION\nMAKE_EVENT_CODE(LPC_REPLICATION_INIT_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(RPC_REPLICATION_WRITE_EMPTY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_PER_REPLICA_CHECKPOINT_TIMER, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_PER_REPLICA_COLLECT_INFO_TIMER, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_write_THROTTLING_DELAY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_GROUP_CHECK, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_CM_DISCONNECTED_SCATTER, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_QUERY_NODE_CONFIGURATION_SCATTER, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_QUERY_NODE_CONFIGURATION_SCATTER2, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_DELAY_UPDATE_CONFIG, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_DELAY_LEARN, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_CHECKPOINT_REPLICA_COMPLETED, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_SIM_UPDATE_PARTITION_CONFIGURATION_REPLY, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_WRITE_REPLICATION_LOG, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_REPLICATION_ERROR, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_AIO(LPC_LERARN_REMOTE_DISK_STATE, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_CONFIG_PROPOSAL, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_QUERY_PN_DECREE, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_QUERY_REPLICA_INFO, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_QUERY_LAST_CHECKPOINT_INFO, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_PREPARE, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE(LPC_DELAY_PREPARE, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_GROUP_CHECK, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_QUERY_APP_INFO, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_LEARN, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_LEARN_COMPLETION_NOTIFY, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_LEARN_ADD_LEARNER, TASK_PRIORITY_HIGH)\nMAKE_EVENT_CODE_RPC(RPC_REMOVE_REPLICA, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_REPLICA_COPY_LAST_CHECKPOINT_DONE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_COLD_BACKUP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_CLEAR_COLD_BACKUP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICATION_COLD_BACKUP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_EXEC_COMMAND_ON_REPLICA, TASK_PRIORITY_LOW)\nMAKE_EVENT_CODE(LPC_PARTITION_SPLIT, TASK_PRIORITY_LOW)\nMAKE_EVENT_CODE(LPC_PARTITION_SPLIT_ERROR, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_SPLIT_NOTIFY_CATCH_UP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_SPLIT_UPDATE_CHILD_PARTITION_COUNT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_RPC(RPC_GROUP_BULK_LOAD, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICATION_LOW, TASK_PRIORITY_LOW)\nMAKE_EVENT_CODE(LPC_REPLICATION_COMMON, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICATION_HIGH, TASK_PRIORITY_HIGH)\n#undef CURRENT_THREAD_POOL\n\n// THREAD_POOL_LOCAL_APP\n#define CURRENT_THREAD_POOL THREAD_POOL_LOCAL_APP\nMAKE_EVENT_CODE(LPC_WRITE, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_read_THROTTLING_DELAY, TASK_PRIORITY_COMMON)\n#undef CURRENT_THREAD_POOL\n\n// THREAD_POOL_REPLICATION_LONG\n#define CURRENT_THREAD_POOL THREAD_POOL_REPLICATION_LONG\nMAKE_EVENT_CODE(LPC_LEARN_REMOTE_DELTA_FILES, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE_AIO(LPC_REPLICATION_COPY_REMOTE_FILES, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_OPEN_REPLICA, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_CLOSE_REPLICA, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_CHECKPOINT_REPLICA, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_CATCHUP_WITH_PRIVATE_LOGS, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_DISK_STAT, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_BACKGROUND_COLD_BACKUP, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_PARTITION_SPLIT_ASYNC_LEARN, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICATION_LONG_LOW, TASK_PRIORITY_LOW)\nMAKE_EVENT_CODE(LPC_REPLICATION_LONG_COMMON, TASK_PRIORITY_COMMON)\nMAKE_EVENT_CODE(LPC_REPLICATION_LONG_HIGH, TASK_PRIORITY_HIGH)\n#undef CURRENT_THREAD_POOL\n\n#define CURRENT_THREAD_POOL THREAD_POOL_SLOG\nMAKE_EVENT_CODE_AIO(LPC_WRITE_REPLICATION_LOG_SHARED, TASK_PRIORITY_HIGH)\n#undef CURRENT_THREAD_POOL\n\n#define CURRENT_THREAD_POOL THREAD_POOL_PLOG\nMAKE_EVENT_CODE_AIO(LPC_WRITE_REPLICATION_LOG_PRIVATE, TASK_PRIORITY_HIGH)\n#undef CURRENT_THREAD_POOL\n\n// bulk load ingestion request\nnamespace dsn {\nnamespace apps {\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_RRDB_RRDB_BULK_LOAD, false, true)\n} // namespace apps\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_app_base.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/cpp/serverlet.h>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/dist/replication/replication.codes.h>\n#include <dsn/dist/replication/replica_base.h>\n#include <atomic>\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation;\nclass replica;\n\nclass replica_init_info\n{\npublic:\n    int32_t magic;\n    int32_t crc;\n    ballot init_ballot;\n    decree init_durable_decree;\n    int64_t init_offset_in_shared_log;\n    int64_t init_offset_in_private_log;\n    DEFINE_JSON_SERIALIZATION(init_ballot,\n                              init_durable_decree,\n                              init_offset_in_shared_log,\n                              init_offset_in_private_log)\n\n    static const std::string kInitInfo;\n\npublic:\n    replica_init_info() { memset((void *)this, 0, sizeof(*this)); }\n    error_code load(const std::string &dir);\n    error_code store(const std::string &dir);\n    std::string to_string();\n\nprivate:\n    error_code load_json(const std::string &file);\n    error_code store_json(const std::string &file);\n};\n\nclass replica_app_info\n{\nprivate:\n    app_info *_app;\n\npublic:\n    replica_app_info(app_info *app) { _app = app; }\n    error_code load(const std::string &file);\n    error_code store(const std::string &file);\n};\n\n/// The store engine interface of Pegasus.\n/// Inherited by pegasus::pegasus_server_impl\n/// Inherited by apps::rrdb_service\nclass replication_app_base : public replica_base\n{\npublic:\n    enum chkpt_apply_mode\n    {\n        copy,\n        learn\n    };\n\n    template <typename T>\n    static replication_app_base *create(replica *r)\n    {\n        return new T(r);\n    }\n    typedef replication_app_base *factory(replica *r);\n    static void register_storage_engine(const std::string &name, factory f);\n    static replication_app_base *new_storage_instance(const std::string &name, replica *r);\n\n    virtual ~replication_app_base() {}\n\n    bool is_primary() const;\n\n    // Whether this replica is duplicating as master.\n    virtual bool is_duplication_master() const;\n    // Whether this replica is duplicating as follower.\n    virtual bool is_duplication_follower() const;\n\n    const ballot &get_ballot() const;\n\n    //\n    // Open the app.\n    //\n    error_code open();\n\n    //\n    // Close the app.\n    // If `clear_state' is true, means clear the app state after close it.\n    //\n    // Must be thread safe.\n    //\n    error_code close(bool clear_state);\n\n    error_code apply_checkpoint(chkpt_apply_mode mode, const learn_state &state);\n    error_code apply_mutation(const mutation *mu);\n\n    // methods need to implement on storage engine side\n    virtual error_code start(int argc, char **argv) = 0;\n    virtual error_code stop(bool clear_state) = 0;\n    //\n    // synchonously checkpoint, and update last_durable_decree internally.\n    // which stops replication writes to the app concurrently.\n    //\n    // Postconditions:\n    // * last_committed_decree() == last_durable_decree()\n    //\n    virtual error_code sync_checkpoint() = 0;\n    //\n    // asynchonously checkpoint, which will not stall the normal write ops.\n    // replication layer will check last_durable_decree() later.\n    //\n    // Must be thread safe.\n    //\n    // It is not always necessary for the apps to implement this method,\n    // but if it is implemented, the checkpoint logic in replication will be much simpler.\n    //\n    virtual error_code async_checkpoint(bool flush_memtable) = 0;\n    //\n    // prepare an app-specific learning request (on learner, to be sent to learnee\n    // and used by method get_checkpoint), so that the learning process is more efficient\n    //\n    virtual error_code prepare_get_checkpoint(/*out*/ blob &learn_req) = 0;\n    //\n    // Learn [start, infinite) from remote replicas (learner)\n    //\n    // Must be thread safe.\n    //\n    // The learned checkpoint can be a complete checkpoint (0, infinite), or a delta checkpoint\n    // [start, infinite), depending on the capability of the underlying implementation.\n    //\n    // Note the files in learn_state are copied from dir /replica@remote/data to dir\n    // /replica@local/learn,\n    // so when apply the learned file state, make sure using learn_dir() instead of data_dir() to\n    // get the\n    // full path of the files.\n    //\n    virtual error_code get_checkpoint(int64_t learn_start,\n                                      const blob &learn_request,\n                                      /*out*/ learn_state &state) = 0;\n    //\n    // [DSN_CHKPT_LEARN]\n    // after learn the state from learner, apply the learned state to the local app\n    //\n    // Or,\n    //\n    // [DSN_CHKPT_COPY]\n    // when an app only implement synchonous checkpoint, the primary replica\n    // needs to copy checkpoint from secondaries instead of\n    // doing checkpointing by itself, in order to not stall the normal\n    // write operations.\n    //\n    // Postconditions:\n    // * if mode == DSN_CHKPT_COPY, after apply_checkpoint() succeed:\n    //   last_durable_decree() == state.to_decree_included\n    // * if mode == DSN_CHKPT_LEARN, after apply_checkpoint() succeed:\n    //   last_committed_decree() == last_durable_decree() == state.to_decree_included\n    //\n    virtual error_code storage_apply_checkpoint(chkpt_apply_mode mode,\n                                                const learn_state &state) = 0;\n    //\n    // copy the latest checkpoint to checkpoint_dir, and the decree of the checkpoint\n    // copied will be assigned to checkpoint_decree if checkpoint_decree not null\n    //\n    // must be thread safe\n    //\n    virtual error_code copy_checkpoint_to_dir(const char *checkpoint_dir,\n                                              /*output*/ int64_t *last_decree,\n                                              bool flush_memtable = false) = 0;\n\n    //\n    // Query methods.\n    //\n    virtual replication::decree last_durable_decree() const = 0;\n    virtual replication::decree last_flushed_decree() const { return last_durable_decree(); }\n    // return the error generated by storage engine\n    virtual int on_request(message_ex *request) = 0;\n\n    //\n    // Parameters:\n    //  - timestamp: an incremental timestamp generated for this batch of requests.\n    //\n    // The base class gives a naive implementation that just call on_request\n    // repeatedly. Storage engine may override this function to get better performance.\n    //\n    virtual int on_batched_write_requests(int64_t decree,\n                                          uint64_t timstamp,\n                                          message_ex **requests,\n                                          int request_length);\n\n    // query compact state.\n    virtual std::string query_compact_state() const = 0;\n\n    // update app envs.\n    virtual void update_app_envs(const std::map<std::string, std::string> &envs) = 0;\n\n    // query app envs.\n    virtual void query_app_envs(/*out*/ std::map<std::string, std::string> &envs) = 0;\n\n    // `partition_version` is used to guarantee data consistency during partition split.\n    // In normal cases, partition_version = partition_count-1, when this replica rejects read\n    // and write request, partition_version = -1.\n    //\n    // Thread-safe.\n    virtual void set_partition_version(int32_t partition_version){};\n\n    // dump the write request some info to string, it may need overload\n    virtual std::string dump_write_request(message_ex *request) { return \"write request\"; };\n\n    virtual void set_ingestion_status(ingestion_status::type status) {}\n\n    virtual ingestion_status::type get_ingestion_status() { return ingestion_status::IS_INVALID; }\n\n    virtual void on_detect_hotkey(const detect_hotkey_request &req,\n                                  /*out*/ detect_hotkey_response &resp)\n    {\n        resp.err = ERR_OBJECT_NOT_FOUND;\n        resp.__set_err_hint(\"on_detect_hotkey implementation not found\");\n    }\n\n    // query pegasus data version\n    virtual uint32_t query_data_version() const = 0;\n\n    virtual manual_compaction_status::type query_compact_status() const = 0;\n\npublic:\n    //\n    // utility functions to be used by app\n    //\n    const std::string &data_dir() const { return _dir_data; }\n    const std::string &learn_dir() const { return _dir_learn; }\n    const std::string &backup_dir() const { return _dir_backup; }\n    const std::string &bulk_load_dir() const { return _dir_bulk_load; }\n    const std::string &duplication_dir() const { return _dir_duplication; }\n    const app_info *get_app_info() const;\n    replication::decree last_committed_decree() const { return _last_committed_decree.load(); }\n\nprivate:\n    // routines for replica internal usage\n    friend class replica;\n    friend class replica_stub;\n    friend class mock_replica;\n    friend class replica_disk_migrator;\n\n    error_code open_internal(replica *r);\n    error_code open_new_internal(replica *r, int64_t shared_log_start, int64_t private_log_start);\n\n    const replica_init_info &init_info() const { return _info; }\n    error_code update_init_info(replica *r,\n                                int64_t shared_log_offset,\n                                int64_t private_log_offset,\n                                int64_t durable_decree);\n    error_code update_init_info_ballot_and_decree(replica *r);\n\nprotected:\n    std::string _dir_data;        // ${replica_dir}/data\n    std::string _dir_learn;       // ${replica_dir}/learn\n    std::string _dir_backup;      // ${replica_dir}/backup\n    std::string _dir_bulk_load;   // ${replica_dir}/bulk_load\n    std::string _dir_duplication; // ${replica_dir}/duplication\n    replica *_replica;\n    std::atomic<int64_t> _last_committed_decree;\n    replica_init_info _info;\n\n    explicit replication_app_base(replication::replica *replica);\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_ddl_client.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replication ddl client\n *\n * Revision history:\n *     2015-12-30, xiaotz, first version\n */\n\n#pragma once\n\n#include <dsn/dist/replication.h>\n\n#include <cctype>\n#include <string>\n#include <map>\n#include <vector>\n\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass replication_ddl_client\n{\npublic:\n    replication_ddl_client(const std::vector<dsn::rpc_address> &meta_servers);\n    ~replication_ddl_client();\n\n    dsn::error_code create_app(const std::string &app_name,\n                               const std::string &app_type,\n                               int partition_count,\n                               int replica_count,\n                               const std::map<std::string, std::string> &envs,\n                               bool is_stateless);\n\n    // reserve_seconds == 0 means use default value in configuration 'hold_seconds_for_dropped_app'\n    dsn::error_code drop_app(const std::string &app_name, int reserve_seconds);\n\n    dsn::error_code recall_app(int32_t app_id, const std::string &new_app_name);\n\n    dsn::error_code list_apps(const dsn::app_status::type status,\n                              bool show_all,\n                              bool detailed,\n                              bool json,\n                              const std::string &file_name);\n\n    dsn::error_code list_apps(const dsn::app_status::type status,\n                              std::vector<::dsn::app_info> &apps);\n\n    dsn::error_code list_nodes(const dsn::replication::node_status::type status,\n                               bool detailed,\n                               const std::string &file_name,\n                               bool resolve_ip = false);\n\n    dsn::error_code\n    list_nodes(const dsn::replication::node_status::type status,\n               std::map<dsn::rpc_address, dsn::replication::node_status::type> &nodes);\n\n    dsn::error_code cluster_name(int64_t timeout_ms, std::string &cluster_name);\n\n    dsn::error_code cluster_info(const std::string &file_name, bool resolve_ip, bool json);\n\n    dsn::error_code list_app(const std::string &app_name,\n                             bool detailed,\n                             bool json,\n                             const std::string &file_name,\n                             bool resolve_ip = false);\n\n    dsn::error_code list_app(const std::string &app_name,\n                             int32_t &app_id,\n                             int32_t &partition_count,\n                             std::vector<partition_configuration> &partitions);\n\n    dsn::replication::configuration_meta_control_response\n    control_meta_function_level(meta_function_level::type level);\n\n    dsn::error_code send_balancer_proposal(const configuration_balancer_request &request);\n\n    dsn::error_code\n    wait_app_ready(const std::string &app_name, int partition_count, int max_replica_count);\n\n    dsn::error_code do_recovery(const std::vector<dsn::rpc_address> &replica_nodes,\n                                int wait_seconds,\n                                bool skip_bad_nodes,\n                                bool skip_lost_partitions,\n                                const std::string &outfile);\n\n    error_with<duplication_add_response>\n    add_dup(std::string app_name, std::string remote_address, bool is_duplicating_checkpoint);\n\n    error_with<duplication_modify_response>\n    change_dup_status(std::string app_name, int dupid, duplication_status::type status);\n    error_with<duplication_modify_response>\n    update_dup_fail_mode(std::string app_name, int dupid, duplication_fail_mode::type fmode);\n\n    error_with<duplication_query_response> query_dup(std::string app_name);\n\n    dsn::error_code do_restore(const std::string &backup_provider_name,\n                               const std::string &cluster_name,\n                               const std::string &policy_name,\n                               int64_t timestamp /*backup_id*/,\n                               const std::string &old_app_name,\n                               int32_t old_app_id,\n                               const std::string &new_app_name,\n                               bool skip_bad_partition,\n                               const std::string &restore_path = \"\");\n\n    dsn::error_code query_restore(int32_t restore_app_id, bool detailed);\n\n    dsn::error_code add_backup_policy(const std::string &policy_name,\n                                      const std::string &backup_provider_type,\n                                      const std::vector<int32_t> &app_ids,\n                                      int64_t backup_interval_seconds,\n                                      int32_t backup_history_cnt,\n                                      const std::string &start_time);\n\n    error_with<start_backup_app_response> backup_app(int32_t app_id,\n                                                     const std::string &backup_provider_type,\n                                                     const std::string &backup_path = \"\");\n\n    error_with<query_backup_status_response> query_backup(int32_t app_id, int64_t backup_id);\n\n    dsn::error_code ls_backup_policy();\n\n    dsn::error_code disable_backup_policy(const std::string &policy_name);\n\n    dsn::error_code enable_backup_policy(const std::string &policy_name);\n\n    dsn::error_code query_backup_policy(const std::vector<std::string> &policy_names,\n                                        int backup_info_cnt);\n\n    dsn::error_code update_backup_policy(const std::string &policy_name,\n                                         const std::vector<int32_t> &add_appids,\n                                         const std::vector<int32_t> &removal_appids,\n                                         int64_t new_backup_interval_sec,\n                                         int32_t backup_history_count_to_keep = 0,\n                                         const std::string &start_time = std::string());\n\n    dsn::error_code get_app_envs(const std::string &app_name,\n                                 std::map<std::string, std::string> &envs);\n    error_with<configuration_update_app_env_response>\n    set_app_envs(const std::string &app_name,\n                 const std::vector<std::string> &keys,\n                 const std::vector<std::string> &values);\n    dsn::error_code del_app_envs(const std::string &app_name, const std::vector<std::string> &keys);\n    // precondition:\n    //  -- if clear_all = true, just ignore prefix\n    //  -- if clear_all = false, then prefix must not be empty\n    dsn::error_code\n    clear_app_envs(const std::string &app_name, bool clear_all, const std::string &prefix);\n\n    dsn::error_code ddd_diagnose(gpid pid, std::vector<ddd_partition_info> &ddd_partitions);\n\n    void query_disk_info(\n        const std::vector<dsn::rpc_address> &targets,\n        const std::string &app_name,\n        /*out*/ std::map<dsn::rpc_address, error_with<query_disk_info_response>> &resps);\n\n    error_with<start_bulk_load_response> start_bulk_load(const std::string &app_name,\n                                                         const std::string &cluster_name,\n                                                         const std::string &file_provider_type,\n                                                         const std::string &remote_root_path,\n                                                         bool ingest_behind = false);\n\n    error_with<control_bulk_load_response>\n    control_bulk_load(const std::string &app_name, const bulk_load_control_type::type control_type);\n\n    error_with<query_bulk_load_response> query_bulk_load(const std::string &app_name);\n\n    error_with<clear_bulk_load_state_response> clear_bulk_load(const std::string &app_name);\n\n    error_code detect_hotkey(const dsn::rpc_address &target,\n                             detect_hotkey_request &req,\n                             detect_hotkey_response &resp);\n\n    // partition split\n    error_with<start_partition_split_response> start_partition_split(const std::string &app_name,\n                                                                     int partition_count);\n    error_with<control_split_response> pause_partition_split(const std::string &app_name,\n                                                             const int32_t parent_pidx);\n    error_with<control_split_response> restart_partition_split(const std::string &app_name,\n                                                               const int32_t parent_pidx);\n    error_with<control_split_response> cancel_partition_split(const std::string &app_name,\n                                                              const int32_t old_partition_count);\n    error_with<control_split_response>\n    control_partition_split(const std::string &app_name,\n                            split_control_type::type control_type,\n                            const int32_t parent_pidx,\n                            const int32_t old_partition_count);\n\n    error_with<query_split_response> query_partition_split(const std::string &app_name);\n\n    error_with<add_new_disk_response> add_new_disk(const rpc_address &target_node,\n                                                   const std::string &disk_str);\n\n    error_with<start_app_manual_compact_response>\n    start_app_manual_compact(const std::string &app_name,\n                             bool bottommost = false,\n                             const int32_t level = -1,\n                             const int32_t max_count = 0);\n\n    error_with<query_app_manual_compact_response>\n    query_app_manual_compact(const std::string &app_name);\n\n    error_with<configuration_get_max_replica_count_response>\n    get_max_replica_count(const std::string &app_name);\n\n    error_with<configuration_set_max_replica_count_response>\n    set_max_replica_count(const std::string &app_name, int32_t max_replica_count);\n\nprivate:\n    bool static valid_app_char(int c);\n\n    void end_meta_request(const rpc_response_task_ptr &callback,\n                          int retry_times,\n                          error_code err,\n                          dsn::message_ex *request,\n                          dsn::message_ex *resp);\n\n    template <typename TRequest>\n    rpc_response_task_ptr request_meta(dsn::task_code code,\n                                       std::shared_ptr<TRequest> &req,\n                                       int timeout_milliseconds = 0,\n                                       int reply_thread_hash = 0)\n    {\n        dsn::message_ex *msg = dsn::message_ex::create_request(code, timeout_milliseconds);\n        ::dsn::marshall(msg, *req);\n\n        rpc_response_task_ptr task = ::dsn::rpc::create_rpc_response_task(\n            msg, nullptr, empty_rpc_handler, reply_thread_hash);\n        rpc::call(_meta_server,\n                  msg,\n                  &_tracker,\n                  [this, task](\n                      error_code err, dsn::message_ex *request, dsn::message_ex *response) mutable {\n                      end_meta_request(std::move(task), 0, err, request, response);\n                  });\n        return task;\n    }\n\n    /// Send request to meta server synchronously.\n    template <typename TRpcHolder, typename TResponse = typename TRpcHolder::response_type>\n    error_with<TResponse> call_rpc_sync(TRpcHolder rpc, int reply_thread_hash = 0)\n    {\n        // Retry at maximum `MAX_RETRY` times when error occurred.\n        static constexpr int MAX_RETRY = 2;\n        error_code err = ERR_UNKNOWN;\n        for (int retry = 0; retry < MAX_RETRY; retry++) {\n            task_ptr task = rpc.call(_meta_server,\n                                     &_tracker,\n                                     [&err](error_code code) { err = code; },\n                                     reply_thread_hash);\n            task->wait();\n            if (err == ERR_OK) {\n                break;\n            }\n        }\n        if (err != ERR_OK) {\n            return error_s::make(err, \"unable to send rpc to server\");\n        }\n        return error_with<TResponse>(std::move(rpc.response()));\n    }\n\n    /// Send request to multi replica server synchronously.\n    template <typename TRpcHolder, typename TResponse = typename TRpcHolder::response_type>\n    void call_rpcs_sync(std::map<dsn::rpc_address, TRpcHolder> &rpcs,\n                        std::map<dsn::rpc_address, error_with<TResponse>> &resps,\n                        int reply_thread_hash = 0,\n                        bool enable_retry = true)\n    {\n        dsn::task_tracker tracker;\n        error_code err = ERR_UNKNOWN;\n        for (auto &rpc : rpcs) {\n            rpc.second.call(\n                rpc.first, &tracker, [&err, &resps, &rpcs, &rpc](error_code code) mutable {\n                    err = code;\n                    if (err == dsn::ERR_OK) {\n                        resps.emplace(rpc.first, std::move(rpc.second.response()));\n                        rpcs.erase(rpc.first);\n                    } else {\n                        resps.emplace(\n                            rpc.first,\n                            std::move(error_s::make(err, \"unable to send rpc to server\")));\n                    }\n                });\n        }\n        tracker.wait_outstanding_tasks();\n\n        if (enable_retry && rpcs.size() > 0) {\n            std::map<dsn::rpc_address, dsn::error_with<TResponse>> retry_resps;\n            call_rpcs_sync(rpcs, retry_resps, reply_thread_hash, false);\n            for (auto &resp : retry_resps) {\n                resps.emplace(resp.first, std::move(resp.second));\n            }\n        }\n    }\n\nprivate:\n    dsn::rpc_address _meta_server;\n    dsn::task_tracker _tracker;\n\n    typedef rpc_holder<detect_hotkey_request, detect_hotkey_response> detect_hotkey_rpc;\n    typedef rpc_holder<query_disk_info_request, query_disk_info_response> query_disk_info_rpc;\n    typedef rpc_holder<add_new_disk_request, add_new_disk_response> add_new_disk_rpc;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_enums.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/enum_helper.h>\n\nnamespace dsn {\nENUM_BEGIN2(app_status::type, app_status, app_status::AS_INVALID)\nENUM_REG(app_status::AS_AVAILABLE)\nENUM_REG(app_status::AS_CREATING)\nENUM_REG(app_status::AS_CREATE_FAILED)\nENUM_REG(app_status::AS_DROPPING)\nENUM_REG(app_status::AS_DROP_FAILED)\nENUM_REG(app_status::AS_DROPPED)\nENUM_REG(app_status::AS_RECALLING)\nENUM_END2(app_status::type, app_status)\n\nENUM_BEGIN2(replication::partition_status::type,\n            partition_status,\n            replication::partition_status::PS_INVALID)\nENUM_REG(replication::partition_status::PS_INACTIVE)\nENUM_REG(replication::partition_status::PS_ERROR)\nENUM_REG(replication::partition_status::PS_PRIMARY)\nENUM_REG(replication::partition_status::PS_SECONDARY)\nENUM_REG(replication::partition_status::PS_POTENTIAL_SECONDARY)\nENUM_REG(replication::partition_status::PS_PARTITION_SPLIT)\nENUM_END2(replication::partition_status::type, partition_status)\n\nENUM_BEGIN2(replication::read_semantic::type,\n            read_semantic,\n            replication::read_semantic::ReadInvalid)\nENUM_REG(replication::read_semantic::ReadLastUpdate)\nENUM_REG(replication::read_semantic::ReadOutdated)\nENUM_REG(replication::read_semantic::ReadSnapshot)\nENUM_END2(replication::read_semantic::type, read_semantic)\n\nENUM_BEGIN2(replication::learn_type::type, learn_type, replication::learn_type::LT_INVALID)\nENUM_REG(replication::learn_type::LT_CACHE)\nENUM_REG(replication::learn_type::LT_APP)\nENUM_REG(replication::learn_type::LT_LOG)\nENUM_END2(replication::learn_type::type, learn_type)\n\nENUM_BEGIN2(replication::learner_status::type,\n            learner_status,\n            replication::learner_status::LearningInvalid)\nENUM_REG(replication::learner_status::LearningWithoutPrepare)\nENUM_REG(replication::learner_status::LearningWithPrepareTransient)\nENUM_REG(replication::learner_status::LearningWithPrepare)\nENUM_REG(replication::learner_status::LearningSucceeded)\nENUM_REG(replication::learner_status::LearningFailed)\nENUM_END2(replication::learner_status::type, learner_status)\n\nENUM_BEGIN2(replication::config_type::type, config_type, replication::config_type::CT_INVALID)\nENUM_REG(replication::config_type::CT_ASSIGN_PRIMARY)\nENUM_REG(replication::config_type::CT_UPGRADE_TO_PRIMARY)\nENUM_REG(replication::config_type::CT_ADD_SECONDARY)\nENUM_REG(replication::config_type::CT_UPGRADE_TO_SECONDARY)\nENUM_REG(replication::config_type::CT_DOWNGRADE_TO_SECONDARY)\nENUM_REG(replication::config_type::CT_DOWNGRADE_TO_INACTIVE)\nENUM_REG(replication::config_type::CT_REMOVE)\nENUM_REG(replication::config_type::CT_ADD_SECONDARY_FOR_LB)\nENUM_REG(replication::config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT)\nENUM_REG(replication::config_type::CT_DROP_PARTITION)\nENUM_REG(replication::config_type::CT_REGISTER_CHILD)\nENUM_END2(replication::config_type::type, config_type)\n\nENUM_BEGIN2(replication::node_status::type, node_status, replication::node_status::NS_INVALID)\nENUM_REG(replication::node_status::NS_ALIVE)\nENUM_REG(replication::node_status::NS_UNALIVE)\nENUM_END2(replication::node_status::type, node_status)\n\nENUM_BEGIN2(replication::bulk_load_status::type,\n            bulk_load_status,\n            replication::bulk_load_status::BLS_INVALID)\nENUM_REG(replication::bulk_load_status::BLS_INVALID)\nENUM_REG(replication::bulk_load_status::BLS_DOWNLOADING)\nENUM_REG(replication::bulk_load_status::BLS_DOWNLOADED)\nENUM_REG(replication::bulk_load_status::BLS_INGESTING)\nENUM_REG(replication::bulk_load_status::BLS_SUCCEED)\nENUM_REG(replication::bulk_load_status::BLS_FAILED)\nENUM_REG(replication::bulk_load_status::BLS_PAUSING)\nENUM_REG(replication::bulk_load_status::BLS_PAUSED)\nENUM_REG(replication::bulk_load_status::BLS_CANCELED)\nENUM_END2(replication::bulk_load_status::type, bulk_load_status)\n\nENUM_BEGIN2(replication::ingestion_status::type,\n            ingestion_status,\n            replication::ingestion_status::IS_INVALID)\nENUM_REG(replication::ingestion_status::IS_INVALID)\nENUM_REG(replication::ingestion_status::IS_RUNNING)\nENUM_REG(replication::ingestion_status::IS_SUCCEED)\nENUM_REG(replication::ingestion_status::IS_FAILED)\nENUM_END2(replication::ingestion_status::type, ingestion_status)\n\nENUM_BEGIN2(replication::hotkey_type::type, hotkey_type, replication::hotkey_type::READ)\nENUM_REG(replication::hotkey_type::READ)\nENUM_REG(replication::hotkey_type::WRITE)\nENUM_END2(replication::hotkey_type::type, hotkey_type)\n\nENUM_BEGIN2(replication::detect_action::type, detect_action, replication::detect_action::START)\nENUM_REG(replication::detect_action::START)\nENUM_REG(replication::detect_action::STOP)\nENUM_REG(replication::detect_action::QUERY)\nENUM_END2(replication::detect_action::type, detect_action)\n\nENUM_BEGIN2(replication::split_status::type, split_status, replication::split_status::NOT_SPLIT)\nENUM_REG(replication::split_status::NOT_SPLIT)\nENUM_REG(replication::split_status::SPLITTING)\nENUM_REG(replication::split_status::PAUSING)\nENUM_REG(replication::split_status::PAUSED)\nENUM_REG(replication::split_status::CANCELING)\nENUM_END2(replication::split_status::type, split_status)\n\nENUM_BEGIN2(replication::disk_migration_status::type,\n            disk_migration_status,\n            replication::disk_migration_status::IDLE)\nENUM_REG(replication::disk_migration_status::IDLE)\nENUM_REG(replication::disk_migration_status::MOVING)\nENUM_REG(replication::disk_migration_status::MOVED)\nENUM_REG(replication::disk_migration_status::CLOSED)\nENUM_END2(replication::disk_migration_status::type, disk_migration_status)\n\nENUM_BEGIN2(replication::disk_status::type, disk_status, replication::disk_status::NORMAL)\nENUM_REG(replication::disk_status::NORMAL)\nENUM_REG(replication::disk_status::SPACE_INSUFFICIENT)\nENUM_END2(replication::disk_status::type, disk_status)\n\nENUM_BEGIN2(replication::manual_compaction_status::type,\n            manual_compaction_status,\n            replication::manual_compaction_status::IDLE)\nENUM_REG(replication::manual_compaction_status::IDLE)\nENUM_REG(replication::manual_compaction_status::QUEUING)\nENUM_REG(replication::manual_compaction_status::RUNNING)\nENUM_REG(replication::manual_compaction_status::FINISHED)\nENUM_END2(replication::manual_compaction_status::type, manual_compaction_status)\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_other_types.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#ifndef replication_OTHER_TYPES_H\n#define replication_OTHER_TYPES_H\n\n#include <sstream>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replication_enums.h>\n\nnamespace dsn {\nnamespace replication {\n\ntypedef int32_t app_id;\ntypedef int64_t ballot;\ntypedef int64_t decree;\n\n#define invalid_ballot ((::dsn::replication::ballot)-1LL)\n#define invalid_decree ((::dsn::replication::decree)-1LL)\n#define invalid_offset (-1LL)\n#define invalid_signature 0\n\ninline bool is_primary(const partition_configuration &pc, const rpc_address &node)\n{\n    return !node.is_invalid() && pc.primary == node;\n}\ninline bool is_secondary(const partition_configuration &pc, const rpc_address &node)\n{\n    return !node.is_invalid() &&\n           std::find(pc.secondaries.begin(), pc.secondaries.end(), node) != pc.secondaries.end();\n}\ninline bool is_member(const partition_configuration &pc, const rpc_address &node)\n{\n    return is_primary(pc, node) || is_secondary(pc, node);\n}\ninline bool is_partition_config_equal(const partition_configuration &pc1,\n                                      const partition_configuration &pc2)\n{\n    // secondaries no need to be same order\n    for (const rpc_address &addr : pc1.secondaries)\n        if (!is_secondary(pc2, addr))\n            return false;\n    // last_drops is not considered into equality check\n    return pc1.ballot == pc2.ballot && pc1.pid == pc2.pid &&\n           pc1.max_replica_count == pc2.max_replica_count && pc1.primary == pc2.primary &&\n           pc1.secondaries.size() == pc2.secondaries.size() &&\n           pc1.last_committed_decree == pc2.last_committed_decree;\n}\n\nclass replica_helper\n{\npublic:\n    static bool remove_node(::dsn::rpc_address node,\n                            /*inout*/ std::vector<::dsn::rpc_address> &nodeList);\n    static bool get_replica_config(const partition_configuration &partition_config,\n                                   ::dsn::rpc_address node,\n                                   /*out*/ replica_configuration &replica_config);\n    // true if meta_list's value of config is valid, otherwise return false\n    static bool load_meta_servers(/*out*/ std::vector<dsn::rpc_address> &servers,\n                                  const char *section = \"meta_server\",\n                                  const char *key = \"server_list\");\n};\n}\n} // namespace\n\n#endif\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_service_app.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/cpp/service_app.h>\n\nnamespace dsn {\n\nclass version_http_service;\n\nnamespace replication {\n\nnamespace test {\nclass test_checker;\n}\n\nclass replica_stub;\ntypedef dsn::ref_ptr<replica_stub> replica_stub_ptr;\n\nclass replication_service_app : public ::dsn::service_app\n{\npublic:\n    static void register_all();\n\n    replication_service_app(const dsn::service_app_info *info);\n\n    virtual ~replication_service_app(void);\n\n    virtual ::dsn::error_code start(const std::vector<std::string> &args) override;\n\n    virtual ::dsn::error_code stop(bool cleanup = false) override;\n\n    virtual void\n    on_intercepted_request(dsn::gpid gpid, bool is_write, dsn::message_ex *msg) override;\n\nprivate:\n    friend class ::dsn::replication::test::test_checker;\n    replica_stub_ptr _stub;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication/replication_types.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n// WARN: This is a legacy file. Please do not include it when you make any modification.\n\n#include \"meta_admin_types.h\"\n#include \"partition_split_types.h\"\n#include \"duplication_types.h\"\n#include \"bulk_load_types.h\"\n#include \"backup_types.h\"\n#include \"consensus_types.h\"\n#include \"replica_admin_types.h\"\n"
  },
  {
    "path": "include/dsn/dist/replication/storage_serverlet.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <vector>\n#include <unordered_map>\n#include <functional>\n\n#include <dsn/service_api_cpp.h>\n\nnamespace dsn {\nnamespace replication {\ntemplate <typename T>\nclass storage_serverlet\n{\nprotected:\n    typedef std::function<void(T *, dsn::message_ex *req)> rpc_handler;\n    static std::unordered_map<std::string, rpc_handler> s_handlers;\n    static std::vector<rpc_handler> s_vhandlers;\n\n    template <typename TReq, typename TResp>\n    static bool\n    register_async_rpc_handler(dsn::task_code rpc_code,\n                               const char *name,\n                               void (*handler)(T *svc, const TReq &req, rpc_replier<TResp> &resp))\n    {\n        rpc_handler h = [handler](T *p, dsn::message_ex *r) {\n            TReq req;\n            ::dsn::unmarshall(r, req);\n            rpc_replier<TResp> replier(r->create_response());\n            handler(p, req, replier);\n        };\n\n        return register_async_rpc_handler(rpc_code, name, h);\n    }\n\n    template <typename TRpcHolder>\n    static bool register_rpc_handler_with_rpc_holder(dsn::task_code rpc_code,\n                                                     const char *name,\n                                                     void (*handler)(T *svc, TRpcHolder))\n    {\n        rpc_handler h = [handler](T *p, dsn::message_ex *request) {\n            handler(p, TRpcHolder::auto_reply(request));\n        };\n\n        return register_async_rpc_handler(rpc_code, name, h);\n    }\n\n    template <typename TReq>\n    static bool register_async_rpc_handler(dsn::task_code rpc_code,\n                                           const char *name,\n                                           void (*handler)(T *svc, const TReq &req))\n    {\n        rpc_handler h = [handler](T *p, dsn::message_ex *r) {\n            TReq req;\n            ::dsn::unmarshall(r, req);\n            handler(p, req);\n        };\n\n        return register_async_rpc_handler(rpc_code, name, h);\n    }\n\n    static bool register_async_rpc_handler(dsn::task_code rpc_code, const char *name, rpc_handler h)\n    {\n        dassert(s_handlers.emplace(rpc_code.to_string(), h).second,\n                \"handler %s has already been registered\",\n                rpc_code.to_string());\n        dassert(s_handlers.emplace(name, h).second, \"handler %s has already been registered\", name);\n\n        s_vhandlers.resize(rpc_code + 1);\n        dassert(s_vhandlers[rpc_code] == nullptr,\n                \"handler %s(%d) has already been registered\",\n                rpc_code.to_string(),\n                rpc_code.code());\n        s_vhandlers[rpc_code] = h;\n        return true;\n    }\n\n    static const rpc_handler *find_handler(dsn::task_code rpc_code)\n    {\n        if (rpc_code < s_vhandlers.size() && s_vhandlers[rpc_code] != nullptr)\n            return &s_vhandlers[rpc_code];\n        auto iter = s_handlers.find(rpc_code.to_string());\n        if (iter != s_handlers.end())\n            return &(iter->second);\n        return nullptr;\n    }\n\n    int handle_request(dsn::message_ex *request)\n    {\n        dsn::task_code t = request->rpc_code();\n        const rpc_handler *ptr = find_handler(t);\n        if (ptr != nullptr) {\n            (*ptr)(static_cast<T *>(this), request);\n        } else {\n            dwarn(\"recv message with unhandled rpc name %s from %s, trace_id = %016\" PRIx64,\n                  t.to_string(),\n                  request->header->from_address.to_string(),\n                  request->header->trace_id);\n            dsn_rpc_reply(request->create_response(), ::dsn::ERR_HANDLER_NOT_FOUND);\n        }\n        return 0;\n    }\n};\n\ntemplate <typename T>\nstd::unordered_map<std::string, typename storage_serverlet<T>::rpc_handler>\n    storage_serverlet<T>::s_handlers;\n\ntemplate <typename T>\nstd::vector<typename storage_serverlet<T>::rpc_handler> storage_serverlet<T>::s_vhandlers;\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/dist/replication.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/dist/replication/replication.codes.h>\n"
  },
  {
    "path": "include/dsn/http/http_server.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n#include <dsn/utility/flags.h>\n#include <dsn/tool-api/task_code.h>\n\nnamespace dsn {\n\nDSN_DECLARE_bool(enable_http_server);\n\n/// The rpc code for all the HTTP RPCs.\nDEFINE_TASK_CODE_RPC(RPC_HTTP_SERVICE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT);\n\nenum http_method\n{\n    HTTP_METHOD_GET = 1,\n    HTTP_METHOD_POST = 2,\n};\n\nclass message_ex;\nstruct http_request\n{\n    static error_with<http_request> parse(dsn::message_ex *m);\n\n    std::string path;\n    // <args_name, args_val>\n    std::unordered_map<std::string, std::string> query_args;\n    blob body;\n    blob full_url;\n    http_method method;\n};\n\nenum class http_status_code\n{\n    ok,                    // 200\n    temporary_redirect,    // 307\n    bad_request,           // 400\n    not_found,             // 404\n    internal_server_error, // 500\n};\n\nextern std::string http_status_code_to_string(http_status_code code);\n\nstruct http_response\n{\n    std::string body;\n    http_status_code status_code{http_status_code::ok};\n    std::string content_type = \"text/plain\";\n    std::string location;\n};\n\ntypedef std::function<void(const http_request &req, http_response &resp)> http_callback;\n\n// Defines the structure of an HTTP call.\nstruct http_call\n{\n    std::string path;\n    std::string help;\n    http_callback callback;\n\n    http_call &with_callback(http_callback cb)\n    {\n        callback = std::move(cb);\n        return *this;\n    }\n    http_call &with_help(std::string hp)\n    {\n        help = std::move(hp);\n        return *this;\n    }\n};\n\n// A suite of HTTP handlers coupled using the same prefix of the service.\n// If a handler is registered with path 'app/duplication', its real path is\n// \"/<root_path>/app/duplication\".\nclass http_service\n{\npublic:\n    virtual ~http_service() = default;\n\n    virtual std::string path() const = 0;\n\n    void register_handler(std::string path, http_callback cb, std::string help);\n};\n\n// Example:\n//\n// ```\n// register_http_call(\"/meta/app\")\n//     .with_callback(std::bind(&meta_http_service::get_app_handler,\n//                              this,\n//                              std::placeholders::_1,\n//                              std::placeholders::_2))\n//     .with_help(\"Gets the app information\")\n//     .add_argument(\"app_name\", HTTP_ARG_STRING);\n// ```\nextern http_call &register_http_call(std::string full_path);\n\n// Starts serving HTTP requests.\n// The internal HTTP server will reuse the rDSN server port.\nextern void start_http_server();\n\n// NOTE: the memory of `svc` will be transferred to the underlying registry.\n// TODO(wutao): pass `svc` as a std::unique_ptr.\nextern void register_http_service(http_service *svc);\n\ninline bool is_http_message(dsn::task_code code)\n{\n    return code == RPC_HTTP_SERVICE || code == RPC_HTTP_SERVICE_ACK;\n}\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/perf_counter/perf_counter.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/enum_helper.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/dlib.h>\n#include <memory>\n#include <sstream>\n#include <vector>\n\ntypedef enum dsn_perf_counter_type_t {\n    COUNTER_TYPE_NUMBER,\n    COUNTER_TYPE_VOLATILE_NUMBER, // special kind of NUMBER which will be reset on get\n    COUNTER_TYPE_RATE,\n    COUNTER_TYPE_NUMBER_PERCENTILES,\n    COUNTER_TYPE_COUNT,\n    COUNTER_TYPE_INVALID\n} dsn_perf_counter_type_t;\n\ntypedef enum dsn_perf_counter_percentile_type_t {\n    COUNTER_PERCENTILE_50,\n    COUNTER_PERCENTILE_90,\n    COUNTER_PERCENTILE_95,\n    COUNTER_PERCENTILE_99,\n    COUNTER_PERCENTILE_999,\n\n    COUNTER_PERCENTILE_COUNT,\n    COUNTER_PERCENTILE_INVALID\n} dsn_perf_counter_percentile_type_t;\n\nconst char *dsn_counter_type_to_string(dsn_perf_counter_type_t t);\ndsn_perf_counter_type_t dsn_counter_type_from_string(const char *str);\n\nconst char *dsn_percentile_type_to_string(dsn_perf_counter_percentile_type_t t);\ndsn_perf_counter_percentile_type_t dsn_percentile_type_from_string(const char *str);\n\nnamespace dsn {\n\nclass perf_counter : public ref_counter\n{\npublic:\n    perf_counter(const char *app,\n                 const char *section,\n                 const char *name,\n                 dsn_perf_counter_type_t type,\n                 const char *dsptr)\n        : _app(app), _section(section), _name(name), _dsptr(dsptr), _type(type)\n    {\n        build_full_name(app, section, name, _full_name);\n    }\n\n    virtual ~perf_counter() {}\n\n    virtual void increment() = 0;\n    virtual void decrement() = 0;\n    virtual void add(int64_t val) = 0;\n    virtual void set(int64_t val) = 0;\n    virtual double get_value() = 0;\n    virtual int64_t get_integer_value() = 0;\n    virtual double get_percentile(dsn_perf_counter_percentile_type_t type) = 0;\n\n    typedef std::vector<std::pair<int64_t *, int>> samples_t;\n\n    // return actual sample count, must <= required_sample_count\n    virtual int get_latest_samples(int required_sample_count, /*out*/ samples_t &samples) const\n    {\n        return 0;\n    }\n\n    // return the latest sample value\n    virtual int64_t get_latest_sample() const { return 0; }\n\n    const char *full_name() const { return _full_name.c_str(); }\n    const char *app() const { return _app.c_str(); }\n    const char *section() const { return _section.c_str(); }\n    const char *name() const { return _name.c_str(); }\n    const char *dsptr() const { return _dsptr.c_str(); }\n    dsn_perf_counter_type_t type() const { return _type; }\n\n    static void build_full_name(const char *app,\n                                const char *section,\n                                const char *name,\n                                /*out*/ std::string &counter_name)\n    {\n        std::stringstream ss;\n        ss << app << \"*\" << section << \"*\" << name;\n        counter_name = ss.str();\n    }\n\nprivate:\n    std::string _app;\n    std::string _section;\n    std::string _name;\n    std::string _dsptr;\n    dsn_perf_counter_type_t _type;\n\n    std::string _full_name;\n    friend class perf_counters;\n};\ntypedef ref_ptr<perf_counter> perf_counter_ptr;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/perf_counter/perf_counter_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/perf_counter/perf_counter.h>\n\nnamespace dsn {\n\nstruct perf_counter_metric\n{\n    std::string name;\n    std::string type;\n    double value;\n    perf_counter_metric() : value(0) {}\n    perf_counter_metric(const char *n, dsn_perf_counter_type_t t, double v)\n        : name(n), type(dsn_counter_type_to_string(t)), value(v)\n    {\n    }\n    DEFINE_JSON_SERIALIZATION(name, type, value)\n};\n\n/// used for command of querying perf counter\nstruct perf_counter_info\n{\n    std::string result; // OK or ERROR\n    int64_t timestamp;  // in seconds\n    std::string timestamp_str;\n    std::vector<perf_counter_metric> counters;\n    perf_counter_info() : timestamp(0) {}\n    DEFINE_JSON_SERIALIZATION(result, timestamp, timestamp_str, counters)\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/perf_counter/perf_counter_wrapper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/perf_counter/perf_counter.h>\n#include <dsn/perf_counter/perf_counters.h>\n\nnamespace dsn {\n\n//\n// perf_counter_wrapper is a wrapper class for perf-counter operations, users should use this class\n// instead of the dsn::perf_counter where a performance counter is needed.\n//\n// for example:\n// class A{\n// public:\n//   A() {\n//      _p1.init_global_counter(...)\n//      _p2.init_app_counter(...)\n//   }\n// private:\n//   perf_counter_wrapper _p1;\n//   perf_counter_wrapper _p2;\n// };\n//\n// user should call init_global_counter/init_app_counter to initialize the counter.\n// all the initialized counters are stored in the singleton dsn::perf_counters,\n// users can collect all counters of the process and intergrate it with monitor system.\n//\nclass perf_counter_wrapper\n{\npublic:\n    perf_counter_wrapper() { _counter = nullptr; }\n\n    perf_counter_wrapper(const perf_counter_wrapper &other) = delete;\n    perf_counter_wrapper(perf_counter_wrapper &other) = delete;\n    perf_counter_wrapper(perf_counter_wrapper &&other) = delete;\n    perf_counter_wrapper &operator=(const perf_counter_wrapper &other) = delete;\n    perf_counter_wrapper &operator=(perf_counter_wrapper &other) = delete;\n    perf_counter_wrapper &operator=(perf_counter_wrapper &&other) = delete;\n\n    ~perf_counter_wrapper() {}\n\n    // clear the real perf-counter object.\n    // call this function if you want free the counter before the wrapper's destructor is called\n    void clear()\n    {\n        if (nullptr != _counter) {\n            dsn::perf_counters::instance().remove_counter(_counter->full_name());\n            _counter = nullptr;\n        }\n    }\n\n    // init app counter create counters for some specific service_app, so different\n    // service_app can create counter with the same name.\n    void init_app_counter(const char *section,\n                          const char *name,\n                          dsn_perf_counter_type_t type,\n                          const char *dsptr)\n    {\n        dsn::perf_counter_ptr c =\n            dsn::perf_counters::instance().get_app_counter(section, name, type, dsptr, true);\n        clear();\n        _counter = c.get();\n    }\n\n    // init global counter create counters globally.\n    void init_global_counter(const char *app,\n                             const char *section,\n                             const char *name,\n                             dsn_perf_counter_type_t type,\n                             const char *dsptr)\n    {\n        dsn::perf_counter_ptr c = dsn::perf_counters::instance().get_global_counter(\n            app, section, name, type, dsptr, true);\n        clear();\n        _counter = c.get();\n    }\n\n    dsn::perf_counter *get() const { return _counter; }\n    dsn::perf_counter *operator->() const { return _counter; }\n\nprivate:\n    // use raw pointer to make the class object small, so it can be accessed quickly\n    dsn::perf_counter *_counter;\n};\n}\n"
  },
  {
    "path": "include/dsn/perf_counter/perf_counters.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/c/api_common.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/perf_counter/perf_counter.h>\n#include <map>\n#include <sstream>\n#include <queue>\n#include <functional>\n#include <unordered_map>\n\nnamespace dsn {\n\n/// Registry of all perf counters, users can get/create a specific perf counter\n/// via `get_app_counter` and `get_global_counter`.\n/// To push metrics to some monitoring systems (e.g Prometheus), users can\n/// collect all the perf counters via `take_snapshot`.\nclass perf_counters : public utils::singleton<perf_counters>\n{\npublic:\n    ///\n    /// get counter with (current_app_name, section, name), try to create a new one\n    /// if create_if_not_exist==true\n    ///\n    perf_counter_ptr get_app_counter(const char *section,\n                                     const char *name,\n                                     dsn_perf_counter_type_t flags,\n                                     const char *dsptr,\n                                     bool create_if_not_exist);\n\n    ///\n    /// get counter with (app, section, name), try to create a new one\n    /// if create_if_not_exist==true\n    ///\n    perf_counter_ptr get_global_counter(const char *app,\n                                        const char *section,\n                                        const char *name,\n                                        dsn_perf_counter_type_t flags,\n                                        const char *dsptr,\n                                        bool create_if_not_exist);\n\n    ///\n    /// please call remove_counter if a previous get_app_counter/get_global_counter is called\n    ///\n    bool remove_counter(const char *full_name);\n\n    perf_counter_ptr get_counter(const std::string &full_name);\n\n    struct counter_snapshot\n    {\n        double value{0.0};\n        std::string name;\n        dsn_perf_counter_type_t type;\n\n    private:\n        friend class perf_counters;\n        bool updated_recently{false};\n    };\n\n    ///\n    /// Some types of perf counters(rate, volatile_number) may change it's value after you visit\n    /// it, so we'd better take a snapshot of all counters before the visiting in case that\n    /// we may get value of counters repeatedly.\n    ///\n    /// here we provider several functions to support these semantics:\n    ///     take_snapshot\n    ///     iterate_snapshot\n    ///     query_snapshot\n    ///\n    /// we you call take_snapshot, a snapshot of current counters and their values will be\n    /// stored in internal variables of perf_counters module,\n    /// then you can iterate all counters or query some specific counters.\n    /// if another take_snapshot is called, the old one will be overwrite.\n    ///\n    /// the snapshot will be protected by a read-write lock internally.\n    ///\n    /// when you read the snapshot, you should provide a callback called \"snapshot_visitor\".\n    /// this callback will be called once for each requested counter.\n    ///\n    /// TODO: totally eliminate this stupid snapshot feature with a better metrics library\n    /// (a metric library which doesn't have SIDE EFFECT when you visit metric!!!)\n    ///\n    typedef std::function<void(const counter_snapshot &)> snapshot_iterator;\n    void take_snapshot();\n    void iterate_snapshot(const snapshot_iterator &v) const;\n\n    // if found is not nullptr, then whether a counter was found will be stored in it\n    // that is to say:\n    //    if (found != nullptr && (*found)[i]==true) {\n    //        counters[i] is in the snapshot\n    //    }\n    void query_snapshot(const std::vector<std::string> &counters,\n                        const snapshot_iterator &v,\n                        std::vector<bool> *found) const;\n\n    // this function collects all counters to perf_counter_info which match\n    // any of the regular expressions in args and returns the json representation\n    // of perf_counter_info\n    std::string list_snapshot_by_regexp(const std::vector<std::string> &args) const;\n\n    // this function collects all counters to perf_counter_info which satisfy\n    // any of the filters generated by args and returns the json representation\n    // of perf_counter_info\n    std::string list_snapshot_by_literal(\n        const std::vector<std::string> &args,\n        std::function<bool(const std::string &arg, const counter_snapshot &cs)> filter) const;\n\nprivate:\n    perf_counters();\n    ~perf_counters();\n\n    // full_name = perf_counter::build_full_name(...);\n    perf_counter *new_counter(const char *app,\n                              const char *section,\n                              const char *name,\n                              dsn_perf_counter_type_t type,\n                              const char *dsptr);\n\n    void get_all_counters(std::vector<perf_counter_ptr> *all) const;\n\n    mutable utils::rw_lock_nr _lock;\n    // keep counter as a refptr to make the counter can be safely accessed\n    // by get_all_counters and remove_counter concurrently\n    //\n    // keep an user reference for each counter coz the counter may be shared by different modules\n    // called get_xxx_counter\n    struct counter_object\n    {\n        perf_counter_ptr counter;\n        int user_reference;\n    };\n    std::unordered_map<std::string, counter_object> _counters;\n\n    mutable utils::rw_lock_nr _snapshot_lock;\n    std::unordered_map<std::string, counter_snapshot> _snapshots;\n\n    // timestamp in seconds when take snapshot of current counters\n    int64_t _timestamp;\n\n    dsn_handle_t _perf_counters_cmd;\n    dsn_handle_t _perf_counters_by_substr_cmd;\n    dsn_handle_t _perf_counters_by_prefix_cmd;\n    dsn_handle_t _perf_counters_by_postfix_cmd;\n\n    friend class utils::singleton<perf_counters>;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/service_api_c.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     this file define the C Service API in rDSN\n *\n * ------------------------------------------------------------------------------\n *\n *  The service system call API for Zion\n * -------------------------------------------\n *  Summary:\n *  (1) rich API for common distributed system development\n *      - thread pools and tasking\n *      - thread synchronization\n *      - remote procedure calls\n *      - asynchnous file operations\n *      - envrionment inputs\n *      - rDSN app model, system and other utilities\n *  (2) portable\n *      - compilable on many platforms (currently linux, windows, FreeBSD, MacOS)\n *      - system calls are in C so that later language wrappers are possibles.\n *  (3) high performance\n *      - all low level components can be plugged with the tool API (in C++)\n *        besides the existing high performance providers;\n *      - developers can also configure thread pools, thread numbers, thread/task\n *        priorities, CPU core affinities, throttling policies etc. declaratively\n *        to build a best threading model for upper apps.\n *  (4) ease of intergration\n *      - support many languages through language wrappers based on this c interface\n *      - easy support for existing protocols (thrift/protobuf etc.)\n *      - integrate with existing platform infra with low level providers (plug-in),\n *        such as loggers, performance counters, etc.\n *  (5) rich debug, development tools and runtime policies support\n *      - tool API with task granularity semantic for further tool and runtime policy development.\n *      - rich existing tools, tracer, profiler, simulator, model checker, replayer, global checker\n *  (7) PRINCIPLE: all non-determinims must be go through these system calls so that powerful\n *      internal tools are possible - replay, model checking, replication, ...,\n *      AND, it is still OK to call other DETERMINISTIC APIs for applications.\n *\n * ------------------------------------------------------------------------------\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version in cpp\n *     July, 2015, @imzhenyu (Zhenyu Guo), refactor and refined in c\n *     Feb., 2016, @imzhenyu (Zhenyu Guo), decompose into several files for V1 release\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n// common data structures and macros\n#include <dsn/c/api_common.h>\n\n// rDSN uses event-driven programming model, and\n// this file defines the task(i.e., event) abstraction and related\n#include <dsn/c/api_task.h>\n\n// service API for app/framework development,\n// including threading/tasking, thread synchronization,\n// RPC, asynchronous file IO, environment, etc.\n#include <dsn/c/api_layer1.h>\n\n// application/framework model in rDSN\n#include <dsn/c/app_model.h>\n\n// some useful utility functions provided by rDSN,\n// such as logging, performance counter, checksum,\n// command line interface registration and invocation,\n// etc.\n#include <dsn/c/api_utilities.h>\n"
  },
  {
    "path": "include/dsn/service_api_cpp.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     cpp development library atop zion's c service api\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/cpp/serialization.h>\n#include <dsn/cpp/rpc_stream.h>\n#include <dsn/cpp/serverlet.h>\n#include <dsn/cpp/service_app.h>\n#include <dsn/tool-api/rpc_address.h>\n"
  },
  {
    "path": "include/dsn/tool/nativerun.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the nativerun tool which is a simple but runnable deployment runtime for zion\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace tools {\n\nclass nativerun : public tool_app\n{\npublic:\n    nativerun(const char *name) : tool_app(name) {}\n\n    virtual void install(service_spec &s) override;\n\n    virtual void run() override;\n};\n}\n} // end namespace dsn::tools\n"
  },
  {
    "path": "include/dsn/tool/node_scoper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace tools {\n\nclass node_scoper\n{\npublic:\n    node_scoper(service_node *node)\n    {\n        _old = tls_dsn;\n        task::set_tls_dsn_context(node, nullptr);\n    }\n\n    ~node_scoper() { tls_dsn = _old; }\n\nprivate:\n    struct __tls_dsn__ _old;\n};\n\n// ---- inline implementation ------\n}\n} // end namespace dsn::tools\n"
  },
  {
    "path": "include/dsn/tool/providers.common.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace tools {\nextern void register_common_providers();\n}\n}\n"
  },
  {
    "path": "include/dsn/tool/simulator.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the simulation tool (include simulation, replay, model-checking, ...)\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <dsn/cpp/service_app.h>\n\nnamespace dsn {\nnamespace tools {\n\nclass checker\n{\npublic:\n    typedef checker *(*factory)();\n    template <typename T>\n    static checker *create()\n    {\n        return new T();\n    }\n\npublic:\n    checker() {}\n    virtual ~checker() {}\n    virtual void initialize(const std::string &name, const std::vector<service_app *> &apps) = 0;\n    virtual void check() = 0;\n    const std::string &name() const { return _name; }\nprotected:\n    std::vector<service_app *> _apps;\n    std::string _name;\n};\n\nclass simulator : public tool_app\n{\npublic:\n    simulator(const char *name) : tool_app(name) {}\n    virtual void install(service_spec &s) override;\n    virtual void run() override;\n    static void register_checker(const std::string &name, checker::factory f);\n\nprivate:\n    static void on_system_exit(sys_exit_type st);\n};\n\n// ---- inline implementation ------\n}\n} // end namespace dsn::tools\n"
  },
  {
    "path": "include/dsn/tool-api/aio_task.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/task.h>\n#include <vector>\n\nnamespace dsn {\n\nnamespace utils {\nclass latency_tracer;\n}\n\nenum aio_type\n{\n    AIO_Invalid,\n    AIO_Read,\n    AIO_Write\n};\n\ntypedef struct\n{\n    void *buffer;\n    int size;\n} dsn_file_buffer_t;\n\nclass disk_engine;\nclass aio_context : public ref_counter\n{\npublic:\n    // filled by apps\n    dsn_handle_t file;\n    void *buffer;\n    uint64_t buffer_size;\n    uint64_t file_offset;\n\n    // filled by frameworks\n    aio_type type;\n    disk_engine *engine;\n    void *file_object; // TODO(wutao1): make it disk_file*, and distinguish it from `file`\n\n    aio_context()\n        : file(nullptr),\n          buffer(nullptr),\n          buffer_size(0),\n          file_offset(0),\n          type(AIO_Invalid),\n          engine(nullptr),\n          file_object(nullptr)\n    {\n    }\n};\ntypedef dsn::ref_ptr<aio_context> aio_context_ptr;\n\nclass aio_task : public task\n{\npublic:\n    aio_task(task_code code, const aio_handler &cb, int hash = 0, service_node *node = nullptr);\n    aio_task(task_code code, aio_handler &&cb, int hash = 0, service_node *node = nullptr);\n\n    // tell the compiler that we want both the enqueue from base task and ours\n    // to prevent the compiler complaining -Werror,-Woverloaded-virtual.\n    using task::enqueue;\n    void enqueue(error_code err, size_t transferred_size);\n\n    size_t get_transferred_size() const { return _transferred_size; }\n\n    // The ownership of `aio_context` is held by `aio_task`.\n    aio_context *get_aio_context() { return _aio_ctx.get(); }\n\n    // merge buffers in _unmerged_write_buffers to a single merged buffer.\n    // and store it in _merged_write_buffer_holder.\n    void collapse();\n\n    // invoked on aio completed\n    virtual void exec() override\n    {\n        if (nullptr != _cb) {\n            _cb(_error, _transferred_size);\n        }\n    }\n\n    std::vector<dsn_file_buffer_t> _unmerged_write_buffers;\n    blob _merged_write_buffer_holder;\n    std::shared_ptr<dsn::utils::latency_tracer> _tracer;\n\nprotected:\n    void clear_non_trivial_on_task_end() override { _cb = nullptr; }\n\nprivate:\n    aio_context_ptr _aio_ctx;\n    size_t _transferred_size;\n    aio_handler _cb;\n};\ntypedef dsn::ref_ptr<aio_task> aio_task_ptr;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/async_calls.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/service_api_c.h>\n#include <dsn/utility/function_traits.h>\n#include <dsn/tool-api/file_io.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/cpp/serialization.h>\n\nnamespace dsn {\n\ninline void empty_rpc_handler(error_code, message_ex *, message_ex *) {}\n\n// callback(error_code, TResponse&& response)\ntemplate <typename TFunction, class Enable = void>\nstruct is_typed_rpc_callback\n{\n    constexpr static bool const value = false;\n};\ntemplate <typename TFunction>\nstruct is_typed_rpc_callback<TFunction,\n                             typename std::enable_if<function_traits<TFunction>::arity == 2>::type>\n{\n    // todo: check if response_t is marshallable\n    using inspect_t = function_traits<TFunction>;\n    constexpr static bool const value =\n        std::is_same<typename inspect_t::template arg_t<0>, error_code>::value &&\n        std::is_default_constructible<\n            typename std::decay<typename inspect_t::template arg_t<1>>::type>::value;\n    using response_t = typename std::decay<typename inspect_t::template arg_t<1>>::type;\n};\n\nnamespace tasking {\ninline task_ptr\ncreate_task(task_code code, task_tracker *tracker, task_handler &&callback, int hash = 0)\n{\n    task_ptr t(new raw_task(code, std::move(callback), hash, nullptr));\n    t->set_tracker(tracker);\n    t->spec().on_task_create.execute(task::get_current_task(), t);\n    return t;\n}\n\ninline task_ptr create_timer_task(task_code code,\n                                  task_tracker *tracker,\n                                  task_handler &&callback,\n                                  std::chrono::milliseconds interval,\n                                  int hash = 0)\n{\n    task_ptr t(new timer_task(code, std::move(callback), interval.count(), hash, nullptr));\n    t->set_tracker(tracker);\n    t->spec().on_task_create.execute(task::get_current_task(), t);\n    return t;\n}\n\ninline task_ptr enqueue(task_code code,\n                        task_tracker *tracker,\n                        task_handler &&callback,\n                        int hash = 0,\n                        std::chrono::milliseconds delay = std::chrono::milliseconds(0))\n{\n    auto tsk = create_task(code, tracker, std::move(callback), hash);\n    tsk->set_delay(static_cast<int>(delay.count()));\n    tsk->enqueue();\n    return tsk;\n}\n\ninline task_ptr enqueue_timer(task_code evt,\n                              task_tracker *tracker,\n                              task_handler &&callback,\n                              std::chrono::milliseconds timer_interval,\n                              int hash = 0,\n                              std::chrono::milliseconds delay = std::chrono::milliseconds(0))\n{\n    auto tsk = create_timer_task(evt, tracker, std::move(callback), timer_interval, hash);\n    tsk->set_delay(static_cast<int>(delay.count()));\n    tsk->enqueue();\n    return tsk;\n}\n} // namespace tasking\n\nnamespace rpc {\n\ninline rpc_response_task_ptr create_rpc_response_task(dsn::message_ex *req,\n                                                      task_tracker *tracker,\n                                                      rpc_response_handler &&callback,\n                                                      int reply_thread_hash = 0)\n{\n    rpc_response_task_ptr t(\n        new rpc_response_task((message_ex *)req, std::move(callback), reply_thread_hash, nullptr));\n    t->set_tracker(tracker);\n    t->spec().on_task_create.execute(task::get_current_task(), t);\n    return t;\n}\n\ntemplate <typename TCallback>\ntypename std::enable_if<is_typed_rpc_callback<TCallback>::value, rpc_response_task_ptr>::type\ncreate_rpc_response_task(dsn::message_ex *req,\n                         task_tracker *tracker,\n                         TCallback &&callback,\n                         int reply_thread_hash = 0)\n{\n    return create_rpc_response_task(\n        req,\n        tracker,\n        [cb_fwd = std::move(callback)](\n            error_code err, dsn::message_ex * req, dsn::message_ex * resp) mutable {\n            typename is_typed_rpc_callback<TCallback>::response_t response = {};\n            if (err == ERR_OK) {\n                unmarshall(resp, response);\n            }\n            cb_fwd(err, std::move(response));\n        },\n        reply_thread_hash);\n}\n\ntemplate <typename TCallback>\nrpc_response_task_ptr call(rpc_address server,\n                           dsn::message_ex *request,\n                           task_tracker *tracker,\n                           TCallback &&callback,\n                           int reply_thread_hash = 0)\n{\n    rpc_response_task_ptr t = create_rpc_response_task(\n        request, tracker, std::forward<TCallback>(callback), reply_thread_hash);\n    dsn_rpc_call(server, t.get());\n    return t;\n}\n\n//\n// for TRequest/TResponse, we assume that the following routines are defined:\n//    marshall(binary_writer& writer, const T& val);\n//    unmarshall(binary_reader& reader, /*out*/ T& val);\n// either in the namespace of utils or T\n// developers may write these helper functions by their own, or use tools\n// such as protocol-buffer, thrift, or bond to generate these functions automatically\n// for their TRequest and TResponse\n//\ntemplate <typename TRequest, typename TCallback>\nrpc_response_task_ptr\ncall(rpc_address server,\n     task_code code,\n     TRequest &&req,\n     task_tracker *tracker,\n     TCallback &&callback,\n     std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n     int thread_hash = 0, ///< if thread_hash == 0 && partition_hash != 0, thread_hash is\n                          /// computed from partition_hash\n     uint64_t partition_hash = 0,\n     int reply_thread_hash = 0)\n{\n    dsn::message_ex *msg = dsn::message_ex::create_request(\n        code, static_cast<int>(timeout.count()), thread_hash, partition_hash);\n    marshall(msg, std::forward<TRequest>(req));\n    return call(server, msg, tracker, std::forward<TCallback>(callback), reply_thread_hash);\n}\n\n// no callback\ntemplate <typename TRequest>\nvoid call_one_way_typed(rpc_address server,\n                        task_code code,\n                        const TRequest &req,\n                        int thread_hash = 0, ///< if thread_hash == 0 && partition_hash != 0,\n                                             /// thread_hash is computed from partition_hash\n                        uint64_t partition_hash = 0)\n{\n    dsn::message_ex *msg = dsn::message_ex::create_request(code, 0, thread_hash, partition_hash);\n    marshall(msg, req);\n    dsn_rpc_call_one_way(server, msg);\n}\n\ntemplate <typename TResponse>\nstd::pair<error_code, TResponse> wait_and_unwrap(const rpc_response_task_ptr &tsk)\n{\n    tsk->wait();\n    std::pair<error_code, TResponse> result;\n    result.first = tsk->error();\n    if (tsk->error() == ERR_OK) {\n        unmarshall(tsk->get_response(), result.second);\n    }\n    return result;\n}\n\ntemplate <typename TResponse, typename TRequest>\nstd::pair<error_code, TResponse>\ncall_wait(rpc_address server,\n          task_code code,\n          TRequest &&req,\n          std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n          int thread_hash = 0,\n          uint64_t partition_hash = 0)\n{\n    return wait_and_unwrap<TResponse>(call(server,\n                                           code,\n                                           std::forward<TRequest>(req),\n                                           nullptr,\n                                           empty_rpc_handler,\n                                           timeout,\n                                           thread_hash,\n                                           partition_hash));\n}\n} // namespace rpc\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/auto_codes.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/threadpool_code.h>\n#include <dsn/tool-api/task_code.h>\n#include <dsn/tool-api/gpid.h>\n"
  },
  {
    "path": "include/dsn/tool-api/command_manager.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/c/api_utilities.h>\n#include <map>\n#include <dsn/utility/autoref_ptr.h>\n\nnamespace dsn {\n\nclass command_manager : public ::dsn::utils::singleton<command_manager>\n{\npublic:\n    typedef std::function<std::string(const std::vector<std::string> &)> command_handler;\n\n    command_manager();\n\n    ~command_manager();\n\n    dsn_handle_t register_command(const std::vector<std::string> &commands,\n                                  const std::string &help_one_line,\n                                  const std::string &help_long,\n                                  command_handler handler);\n\n    void deregister_command(dsn_handle_t handle);\n\n    bool run_command(const std::string &cmd,\n                     const std::vector<std::string> &args,\n                     /*out*/ std::string &output);\n\nprivate:\n    struct command_instance : public ref_counter\n    {\n        std::vector<std::string> commands;\n        std::string help_short;\n        std::string help_long;\n        command_handler handler;\n    };\n\n    typedef ref_ptr<command_instance> command_instance_ptr;\n    utils::rw_lock_nr _lock;\n    std::map<std::string, command_instance_ptr> _handlers;\n};\n\n} // namespace dsn\n\n#define UNREGISTER_VALID_HANDLER(ptr)                                                              \\\n    do {                                                                                           \\\n        if (ptr != nullptr) {                                                                      \\\n            dsn::command_manager::instance().deregister_command(ptr);                              \\\n            ptr = nullptr;                                                                         \\\n        }                                                                                          \\\n    } while (0)\n\n// if args are empty, then return the old flag;\n// otherwise set the proper \"flag\" according to args\ninline std::string remote_command_set_bool_flag(bool &flag,\n                                                const char *flag_name,\n                                                const std::vector<std::string> &args)\n{\n    std::string ret_msg(\"OK\");\n    if (args.empty()) {\n        ret_msg = flag ? \"true\" : \"false\";\n    } else {\n        if (args[0] == \"true\") {\n            flag = true;\n            ddebug(\"set %s to true by remote command\", flag_name);\n        } else if (args[0] == \"false\") {\n            flag = false;\n            ddebug(\"set %s to false by remote command\", flag_name);\n        } else {\n            ret_msg = \"ERR: invalid arguments\";\n        }\n    }\n    return ret_msg;\n}\n"
  },
  {
    "path": "include/dsn/tool-api/env_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     interface of the environment in Zion\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/dlib.h>\n#include <random>\n\nnamespace dsn {\n/*!\n@addtogroup tool-api-providers\n@{\n*/\nclass env_provider\n{\npublic:\n    template <typename T>\n    static env_provider *create(env_provider *inner_provider)\n    {\n        return new T(inner_provider);\n    }\n\n    typedef env_provider *(*factory)(env_provider *);\n\n    env_provider(env_provider *inner_provider){};\n};\n/*@}*/\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/file_io.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <fcntl.h>\n\n#include <dsn/tool-api/aio_task.h>\n\nnamespace dsn {\n\n// forward declaration\nclass disk_file;\n\nnamespace file {\n\n/// open file\n///\n/// \\param file_name filename of the file.\n/// \\param flag      flags such as O_RDONLY | O_BINARY used by ::open\n/// \\param pmode     permission mode used by ::open\n///\n/// \\return file handle\n///\nextern disk_file *open(const char *file_name, int flag, int pmode);\n\n/// close the file handle\nextern error_code close(disk_file *file);\n\n/// flush the buffer of the given file\nextern error_code flush(disk_file *file);\n\ninline aio_task_ptr\ncreate_aio_task(task_code code, task_tracker *tracker, aio_handler &&callback, int hash = 0)\n{\n    aio_task_ptr t(new aio_task(code, std::move(callback), hash));\n    t->set_tracker((task_tracker *)tracker);\n    t->spec().on_task_create.execute(task::get_current_task(), t);\n    return t;\n}\n\nextern aio_task_ptr read(disk_file *file,\n                         char *buffer,\n                         int count,\n                         uint64_t offset,\n                         task_code callback_code,\n                         task_tracker *tracker,\n                         aio_handler &&callback,\n                         int hash = 0);\n\nextern aio_task_ptr write(disk_file *file,\n                          const char *buffer,\n                          int count,\n                          uint64_t offset,\n                          task_code callback_code,\n                          task_tracker *tracker,\n                          aio_handler &&callback,\n                          int hash = 0);\n\nextern aio_task_ptr write_vector(disk_file *file,\n                                 const dsn_file_buffer_t *buffers,\n                                 int buffer_count,\n                                 uint64_t offset,\n                                 task_code callback_code,\n                                 task_tracker *tracker,\n                                 aio_handler &&callback,\n                                 int hash = 0);\n\nextern aio_context_ptr prepare_aio_context(aio_task *tsk);\n\n} // namespace file\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/future_types.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <functional>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/task.h>\n\nnamespace dsn {\ntypedef std::function<void(dsn::error_code)> err_callback;\ntypedef future_task<dsn::error_code> error_code_future;\ntypedef dsn::ref_ptr<error_code_future> error_code_future_ptr;\n}\n"
  },
  {
    "path": "include/dsn/tool-api/global_config.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     rdsn configurations\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n/// Attention: There are some types which are defined in dsn_runtime being used in this file,\n/// so this file is coupled with dsn_runtime. If you want to add some variables/types here or\n/// include this file, please make sure whether you want to couple with dsn_runtime or not.\n\n#pragma once\n\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/threadpool_spec.h>\n#include <dsn/utility/dlib.h>\n#include <string>\n#include <map>\n\nnamespace dsn {\n\n//\n// channel and header format are specified per task-code\n// port is specified per RPC call\n//\nstruct network_client_config\n{\n    std::string factory_name;\n    int message_buffer_block_size;\n\n    DSN_API network_client_config();\n};\n\ntypedef std::map<rpc_channel, network_client_config> network_client_configs;\n\nstruct network_server_config\n{\n    // [ key\n    int port;\n    rpc_channel channel;\n    // ]\n\n    std::string factory_name;\n    int message_buffer_block_size;\n\n    DSN_API network_server_config();\n    DSN_API network_server_config(int p, rpc_channel c);\n    DSN_API network_server_config(const network_server_config &r);\n    DSN_API bool operator<(const network_server_config &r) const;\n};\n\n// <port,channel> => config\ntypedef std::map<network_server_config, network_server_config> network_server_configs;\n\n// Terms used in rDSN:\n//  - app_id\n//  - app_name/role_name\n//  - role_index\n//  - app_full_name\n//  - app_type\nstruct service_app_spec\n{\n    int id;    // global id for all roles, assigned by rDSN automatically\n    int index; // local index for the current role (1,2,3,...), also named as \"role_index\"\n    std::string data_dir;       // data dir for the app. it is auto-set as\n                                // ${service_spec.data_dir}/${service_app_spec.name}.\n    std::string config_section; // [apps.${role_name}]\n    std::string role_name;      // role name of [apps.${role_name}], also named as \"app_name\"\n\n    // combined by role_name and role_index, also named as \"app_full_name\"\n    // e.g., if role_name = meta and role_index = 1, then app_full_name = meta1\n    // specially, if role count is 1, then app_full_name equals to role_name\n    // it is usually used for printing log\n    std::string full_name;\n    std::string type; // registered type name, alse named as \"app_type\"\n    std::string arguments;\n    std::vector<int> ports;\n    std::list<dsn::threadpool_code> pools;\n    int delay_seconds;\n    bool run;\n    int count;     // index = 1,2,...,count\n    int ports_gap; // when count > 1\n\n    network_client_configs network_client_confs;\n    network_server_configs network_server_confs;\n\n    service_app_spec() {}\n    /*service_app_spec(const service_app_spec& r);*/\n    DSN_API bool init(const char *section,\n                      const char *role_name_,\n                      service_app_spec *default_value,\n                      network_client_configs *default_client_nets = nullptr,\n                      network_server_configs *default_server_nets = nullptr);\n};\n\nCONFIG_BEGIN(service_app_spec)\nCONFIG_FLD_STRING(type, \"\", \"app type name, as given when registering by dsn_register_app\")\nCONFIG_FLD_STRING(arguments, \"\", \"arguments for the app instances\")\nCONFIG_FLD_INT_LIST(ports, \"RPC server listening ports needed for this app\")\nCONFIG_FLD_ID_LIST(threadpool_code, pools, \"thread pools need to be started\")\nCONFIG_FLD(int, uint64, delay_seconds, 0, \"delay seconds for when the apps should be started\")\nCONFIG_FLD(int,\n           uint64,\n           count,\n           1,\n           \"count of app instances for this type (ports are automatically \"\n           \"calculated accordingly to avoid confliction)\")\nCONFIG_FLD(bool, bool, run, true, \"whether to run the app instances or not\")\nCONFIG_END\n\nstruct service_spec\n{\n    std::string tool;                // the main tool (only 1 is allowed for a time)\n    std::list<std::string> toollets; // toollets enabled compatible to the main tool\n    std::string data_dir;            // to store all data/log/coredump etc.\n\n    //\n    // we allow multiple apps in the same process in rDSN, and each app (service_app_spec)\n    // has its own rpc/thread/disk engines etc..\n    // when a rDSN call is made in a thread not belonging to any rDSN app,\n    // developers need to call dsn_mimic_app to designated which app this call and subsequent\n    // calls belong to.\n    // this is kinds of tedious sometimes, we therefore introduce enable_default_app_mimic\n    // option here, which automatically starts an internal app which does nothing but serves\n    // those external calls only. This will release the developers from writing dsn_mimic_app\n    // when they write certain codes, esp. client side code.\n    //\n    bool enable_default_app_mimic;\n\n    std::string timer_factory_name;\n    std::string env_factory_name;\n    std::string lock_factory_name;\n    std::string lock_nr_factory_name;\n    std::string rwlock_nr_factory_name;\n    std::string semaphore_factory_name;\n    std::string logging_factory_name;\n\n    network_client_configs network_default_client_cfs; // default network configed by tools\n    network_server_configs network_default_server_cfs; // default network configed by tools\n    std::vector<threadpool_spec> threadpool_specs;\n    std::vector<service_app_spec> app_specs;\n\n    // auto-set\n    std::string dir_log;\n\n    service_spec() {}\n    DSN_API bool init();\n    DSN_API bool init_app_specs();\n};\n\nCONFIG_BEGIN(service_spec)\nCONFIG_FLD_STRING(tool, \"\", \"use what tool to run this process, e.g., native or simulator\")\nCONFIG_FLD_STRING_LIST(toollets, \"use what toollets, e.g., tracer, profiler, fault_injector\")\nCONFIG_FLD_STRING(data_dir, \"./data\", \"where to put the all the data/log/coredump, etc..\")\nCONFIG_FLD(\n    bool,\n    bool,\n    enable_default_app_mimic,\n    false,\n    \"whether to start a default service app for serving the rDSN calls made in\\n\"\n    \"; non-rDSN threads, so that developers do not need to write dsn_mimic_app call before them\\n\"\n    \"; in this case, a [apps.mimic] section must be defined in config files\");\n\nCONFIG_FLD_STRING(timer_factory_name, \"\", \"timer service provider\")\nCONFIG_FLD_STRING(env_factory_name, \"\", \"environment provider\")\nCONFIG_FLD_STRING(lock_factory_name, \"\", \"recursive exclusive lock provider\")\nCONFIG_FLD_STRING(lock_nr_factory_name, \"\", \"non-recurisve exclusive lock provider\")\nCONFIG_FLD_STRING(rwlock_nr_factory_name, \"\", \"non-recurisve rwlock provider\")\nCONFIG_FLD_STRING(semaphore_factory_name, \"\", \"semaphore provider\")\nCONFIG_FLD_STRING(logging_factory_name, \"\", \"logging provider\")\nCONFIG_END\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/gpid.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <cstdint>\n#include <thrift/protocol/TProtocol.h>\n\nnamespace dsn {\n\n// Group-Partition-ID.\nclass gpid\n{\npublic:\n    constexpr gpid(int app_id, int pidx) : _value({.u = {app_id, pidx}}) {}\n\n    constexpr gpid() = default;\n\n    constexpr uint64_t value() const { return _value.value; }\n\n    bool operator<(const gpid &r) const\n    {\n        return _value.u.app_id < r._value.u.app_id ||\n               (_value.u.app_id == r._value.u.app_id &&\n                _value.u.partition_index < r._value.u.partition_index);\n    }\n\n    constexpr bool operator==(const gpid &r) const { return value() == r.value(); }\n\n    constexpr bool operator!=(const gpid &r) const { return value() != r.value(); }\n\n    constexpr int32_t get_app_id() const { return _value.u.app_id; }\n\n    constexpr int32_t get_partition_index() const { return _value.u.partition_index; }\n\n    void set_app_id(int32_t v) { _value.u.app_id = v; }\n\n    void set_partition_index(int32_t v) { _value.u.partition_index = v; }\n\n    void set_value(uint64_t v) { _value.value = v; }\n\n    bool parse_from(const char *str);\n\n    const char *to_string() const;\n\n    // for serialization in thrift format\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    int thread_hash() const { return _value.u.app_id * 7919 + _value.u.partition_index; }\n\n    friend std::ostream &operator<<(std::ostream &os, gpid id)\n    {\n        return os << std::string(id.to_string());\n    }\n\nprivate:\n    union\n    {\n        struct\n        {\n            int32_t app_id;          ///< 1-based app id (0 for invalid)\n            int32_t partition_index; ///< zero-based partition index\n        } u;\n        uint64_t value;\n    } _value{.value = 0};\n};\n\n} // namespace dsn\n\nnamespace std {\ntemplate <>\nstruct hash<::dsn::gpid>\n{\n    size_t operator()(const ::dsn::gpid &pid) const\n    {\n        return static_cast<std::size_t>(pid.thread_hash());\n    }\n};\n} // namespace std\n"
  },
  {
    "path": "include/dsn/tool-api/group_address.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     group_address is a collection of rpc_addresses, usually used for replication\n *\n * Revision history:\n *     Sep., 2015, @imzhenyu, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <algorithm>\n#include <dsn/c/api_utilities.h>\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/rand.h>\n#include <dsn/tool-api/rpc_address.h>\n\nnamespace dsn {\nclass rpc_group_address : public dsn::ref_counter\n{\npublic:\n    rpc_group_address(const char *name);\n    rpc_group_address(const rpc_group_address &other);\n    rpc_group_address &operator=(const rpc_group_address &other);\n    bool add(rpc_address addr);\n    void add_list(const std::vector<rpc_address> &list)\n    {\n        for (const rpc_address &r : list) {\n            add(r);\n        }\n    }\n    void set_leader(rpc_address addr);\n    bool remove(rpc_address addr);\n    bool contains(rpc_address addr);\n    int count();\n\n    const std::vector<rpc_address> &members() const { return _members; }\n    rpc_address random_member() const\n    {\n        alr_t l(_lock);\n        return _members.empty() ? rpc_address::s_invalid_address\n                                : _members[rand::next_u32(0, (uint32_t)_members.size() - 1)];\n    }\n    rpc_address next(rpc_address current) const;\n    rpc_address leader() const\n    {\n        alr_t l(_lock);\n        return _leader_index >= 0 ? _members[_leader_index] : rpc_address::s_invalid_address;\n    }\n    void leader_forward();\n    rpc_address possible_leader();\n    bool is_update_leader_automatically() const { return _update_leader_automatically; }\n    void set_update_leader_automatically(bool value) { _update_leader_automatically = value; }\n    const char *name() const { return _name.c_str(); }\n\nprivate:\n    typedef std::vector<rpc_address> members_t;\n    typedef ::dsn::utils::auto_read_lock alr_t;\n    typedef ::dsn::utils::auto_write_lock alw_t;\n\n    mutable ::dsn::utils::rw_lock_nr _lock;\n    members_t _members;\n    int _leader_index;\n    bool _update_leader_automatically;\n    std::string _name;\n};\n\n// ------------------ inline implementation --------------------\n\ninline rpc_group_address::rpc_group_address(const char *name)\n{\n    _name = name;\n    _leader_index = -1;\n    _update_leader_automatically = true;\n}\n\ninline rpc_group_address::rpc_group_address(const rpc_group_address &other)\n{\n    _name = other._name;\n    _leader_index = other._leader_index;\n    _update_leader_automatically = other._update_leader_automatically;\n    _members = other._members;\n}\n\ninline rpc_group_address &rpc_group_address::operator=(const rpc_group_address &other)\n{\n    _name = other._name;\n    _leader_index = other._leader_index;\n    _update_leader_automatically = other._update_leader_automatically;\n    _members = other._members;\n    return *this;\n}\n\ninline bool rpc_group_address::add(rpc_address addr)\n{\n    dassert(addr.type() == HOST_TYPE_IPV4, \"rpc group address member must be ipv4\");\n\n    alw_t l(_lock);\n    if (_members.end() == std::find(_members.begin(), _members.end(), addr)) {\n        _members.push_back(addr);\n        return true;\n    } else {\n        return false;\n    }\n}\n\ninline void rpc_group_address::leader_forward()\n{\n    alw_t l(_lock);\n    if (_members.empty())\n        return;\n    _leader_index = (_leader_index + 1) % _members.size();\n}\n\ninline void rpc_group_address::set_leader(rpc_address addr)\n{\n    alw_t l(_lock);\n    if (addr.is_invalid()) {\n        _leader_index = -1;\n    } else {\n        dassert(addr.type() == HOST_TYPE_IPV4, \"rpc group address member must be ipv4\");\n        for (int i = 0; i < (int)_members.size(); i++) {\n            if (_members[i] == addr) {\n                _leader_index = i;\n                return;\n            }\n        }\n\n        _members.push_back(addr);\n        _leader_index = (int)(_members.size() - 1);\n    }\n}\n\ninline rpc_address rpc_group_address::possible_leader()\n{\n    alr_t l(_lock);\n    if (_members.empty())\n        return rpc_address::s_invalid_address;\n    if (_leader_index == -1)\n        _leader_index = rand::next_u32(0, (uint32_t)_members.size() - 1);\n    return _members[_leader_index];\n}\n\ninline bool rpc_group_address::remove(rpc_address addr)\n{\n    alw_t l(_lock);\n    auto it = std::find(_members.begin(), _members.end(), addr);\n    bool r = (it != _members.end());\n    if (r) {\n        if (-1 != _leader_index && addr == _members[_leader_index])\n            _leader_index = -1;\n\n        _members.erase(it);\n    }\n    return r;\n}\n\ninline bool rpc_group_address::contains(rpc_address addr)\n{\n    alr_t l(_lock);\n    return _members.end() != std::find(_members.begin(), _members.end(), addr);\n}\n\ninline int rpc_group_address::count()\n{\n    alr_t l(_lock);\n    return _members.size();\n}\n\ninline rpc_address rpc_group_address::next(rpc_address current) const\n{\n    alr_t l(_lock);\n    if (_members.empty())\n        return rpc_address::s_invalid_address;\n    if (current.is_invalid())\n        return _members[rand::next_u32(0, (uint32_t)_members.size() - 1)];\n    else {\n        auto it = std::find(_members.begin(), _members.end(), current);\n        if (it == _members.end())\n            return _members[rand::next_u32(0, (uint32_t)_members.size() - 1)];\n        else {\n            it++;\n            return it == _members.end() ? _members[0] : *it;\n        }\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/logging_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     base prototype for logging\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <stdarg.h>\n#include <dsn/utility/factory_store.h>\n\nnamespace dsn {\n/*!\n@addtogroup tool-api-providers\n@{\n*/\nclass logging_provider\n{\npublic:\n    template <typename T>\n    static logging_provider *create(const char *log_dir)\n    {\n        return new T(log_dir);\n    }\n\n    typedef logging_provider *(*factory)(const char *);\n\npublic:\n    logging_provider(const char *) {}\n\n    virtual ~logging_provider(void) {}\n\n    // singleton\n    static logging_provider *instance();\n\n    // not thread-safe\n    static void set_logger(logging_provider *logger);\n\n    virtual void dsn_logv(const char *file,\n                          const char *function,\n                          const int line,\n                          dsn_log_level_t log_level,\n                          const char *fmt,\n                          va_list args) = 0;\n\n    virtual void dsn_log(const char *file,\n                         const char *function,\n                         const int line,\n                         dsn_log_level_t log_level,\n                         const char *str) = 0;\n\n    virtual void flush() = 0;\n\nprivate:\n    static std::unique_ptr<logging_provider> _logger;\n\n    static logging_provider *create_default_instance();\n};\n\nvoid set_log_prefixed_message_func(std::function<std::string()> func);\nextern std::function<std::string()> log_prefixed_message_func;\n\nnamespace tools {\nnamespace internal_use_only {\nDSN_API bool register_component_provider(const char *name,\n                                         logging_provider::factory f,\n                                         ::dsn::provider_type type);\n} // namespace internal_use_only\n} // namespace tools\n} // namespace dsn\n\nextern void dsn_log_init(const std::string &logging_factory_name,\n                         const std::string &dir_log,\n                         std::function<std::string()> dsn_log_prefixed_message_func);\n"
  },
  {
    "path": "include/dsn/tool-api/message_parser.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/blob.h>\n#include <dsn/utility/dlib.h>\n#include <vector>\n\nnamespace dsn {\n\n// TODO(wutao1): call it read_buffer, and make it an utility\n// Not-Thread-Safe.\nclass message_reader\n{\npublic:\n    explicit message_reader(int buffer_block_size)\n        : _buffer_occupied(0), _buffer_block_size(buffer_block_size)\n    {\n    }\n\n    // called before read to extend read buffer\n    DSN_API char *read_buffer_ptr(unsigned int read_next);\n\n    // get remaining buffer capacity\n    unsigned int read_buffer_capacity() const { return _buffer.length() - _buffer_occupied; }\n\n    // called after read to mark data occupied\n    void mark_read(unsigned int read_length) { _buffer_occupied += read_length; }\n\n    // discard read data\n    void truncate_read() { _buffer_occupied = 0; }\n\n    // mark the tailing `sz` of bytes are consumed and discardable.\n    void consume_buffer(size_t sz)\n    {\n        _buffer = _buffer.range(sz);\n        _buffer_occupied -= sz;\n    }\n\n    blob buffer() const { return _buffer.range(0, _buffer_occupied); }\n\npublic:\n    // TODO(wutao1): make them private members\n    blob _buffer;\n    unsigned int _buffer_occupied;\n    const unsigned int _buffer_block_size;\n};\n\nclass message_parser;\ntypedef ref_ptr<message_parser> message_parser_ptr;\n\nclass message_ex;\n\nclass message_parser : public ref_counter\n{\npublic:\n    template <typename T>\n    static message_parser *create()\n    {\n        return new T();\n    }\n\n    typedef message_parser *(*factory)();\n\npublic:\n    virtual ~message_parser() {}\n\n    // reset the parser\n    virtual void reset() {}\n\n    // after read, see if we can compose a message\n    // if read_next returns -1, indicated the the message is corrupted\n    virtual message_ex *get_message_on_receive(message_reader *reader, /*out*/ int &read_next) = 0;\n\n    // prepare buffer before send.\n    // this method should be called before get_buffer_count_on_send() and get_buffers_on_send()\n    // to do some prepare operation.\n    // may be invoked for mutiple times if the message is reused for resending.\n    virtual void prepare_on_send(message_ex *msg) {}\n\n    struct send_buf\n    {\n        void *buf;\n        size_t sz;\n    };\n\n    // get max buffer count needed by get_buffers_on_send().\n    // may be invoked for mutiple times if the message is reused for resending.\n    int get_buffer_count_on_send(message_ex *msg) const\n    {\n        return static_cast<int>(msg->buffers.size());\n    }\n\n    // get buffers from message to 'buffers'.\n    // return buffer count used, which must be no more than the return value of\n    // get_buffer_count_on_send().\n    // may be invoked for mutiple times if the message is reused for resending.\n    virtual int get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers) = 0;\n\npublic:\n    DSN_API static network_header_format\n    get_header_type(const char *bytes); // buffer size >= sizeof(uint32_t)\n    DSN_API static std::string get_debug_string(const char *bytes);\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/network.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/task.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/tool-api/message_parser.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/utility/exp_delay.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <atomic>\n\nnamespace dsn {\n\nclass rpc_engine;\nclass service_node;\nclass task_worker_pool;\nclass task_queue;\n\n/*!\n@addtogroup tool-api-providers\n@{\n*/\n\n/*!\n  network bound to a specific rpc_channel and port (see start)\n !!! all threads must be started with task::set_tls_dsn_context(provider->node(), null);\n*/\nclass network\n{\npublic:\n    //\n    // network factory prototype\n    //\n    template <typename T>\n    static network *create(rpc_engine *srv, network *inner_provider)\n    {\n        return new T(srv, inner_provider);\n    }\n\n    typedef network *(*factory)(rpc_engine *, network *);\n\npublic:\n    //\n    // srv - the rpc engine, could contain many networks there\n    // inner_provider - when not null, this network is simply a wrapper for tooling purpose (e.g.,\n    // tracing)\n    //                  all downcalls should be redirected to the inner provider in the end\n    //\n    DSN_API network(rpc_engine *srv, network *inner_provider);\n    virtual ~network() {}\n\n    //\n    // when client_only is true, port is faked (equal to app id for tracing purpose)\n    //\n    virtual error_code start(rpc_channel channel, int port, bool client_only) = 0;\n\n    //\n    // the named address\n    //\n    virtual ::dsn::rpc_address address() = 0;\n\n    //\n    // this is where the upper rpc engine calls down for a RPC call\n    //   request - the message to be sent, all meta info (e.g., timeout, server address are\n    //             prepared ready in its header; use message_parser to extract\n    //             blobs from message for sending\n    //\n    virtual void send_message(message_ex *request) = 0;\n\n    //\n    // tools in rDSN may decide to drop this msg,\n    // in this case, the network should implement the appropriate\n    // failure model that makes this failure possible in reality\n    //\n    virtual void inject_drop_message(message_ex *msg, bool is_send) = 0;\n\n    //\n    // utilities\n    //\n    DSN_API service_node *node() const;\n\n    //\n    // called when network received a complete request message\n    //\n    DSN_API void on_recv_request(message_ex *msg, int delay_ms);\n\n    //\n    // called when network received a complete reply message or network failed,\n    // if network failed, the 'msg' will be nullptr\n    //\n    DSN_API void on_recv_reply(uint64_t id, message_ex *msg, int delay_ms);\n\n    //\n    // create a message parser for\n    //  (1) extracing blob from a RPC request message for low layer'\n    //  (2) parsing a incoming blob message to get the rpc_message\n    //\n    DSN_API message_parser *new_message_parser(network_header_format hdr_format);\n\n    rpc_engine *engine() const { return _engine; }\n    int max_buffer_block_count_per_send() const { return _max_buffer_block_count_per_send; }\n    network_header_format client_hdr_format() const { return _client_hdr_format; }\n    network_header_format unknown_msg_hdr_format() const { return _unknown_msg_header_format; }\n    int message_buffer_block_size() const { return _message_buffer_block_size; }\n\n    DSN_API static uint32_t get_local_ipv4();\n\nprotected:\n    rpc_engine *_engine;\n    network_header_format _client_hdr_format;\n    network_header_format _unknown_msg_header_format; // default is NET_HDR_INVALID\n    int _message_buffer_block_size;\n    int _max_buffer_block_count_per_send;\n    int _send_queue_threshold;\n\nprivate:\n    friend class rpc_engine;\n    DSN_API void reset_parser_attr(network_header_format client_hdr_format,\n                                   int message_buffer_block_size);\n};\n\n/*!\n  an incomplete network implementation for connection oriented network, e.g., TCP\n*/\nclass connection_oriented_network : public network\n{\npublic:\n    DSN_API connection_oriented_network(rpc_engine *srv, network *inner_provider);\n    virtual ~connection_oriented_network() {}\n\n    // server session management\n    DSN_API rpc_session_ptr get_server_session(::dsn::rpc_address ep);\n    DSN_API void on_server_session_accepted(rpc_session_ptr &s);\n    DSN_API void on_server_session_disconnected(rpc_session_ptr &s);\n\n    // Checks if IP of the incoming session has too much connections.\n    // Related config: [network] conn_threshold_per_ip. No limit if the value is 0.\n    DSN_API bool check_if_conn_threshold_exceeded(::dsn::rpc_address ep);\n\n    // client session management\n    DSN_API void on_client_session_connected(rpc_session_ptr &s);\n    DSN_API void on_client_session_disconnected(rpc_session_ptr &s);\n\n    // called upon RPC call, rpc client session is created on demand\n    DSN_API virtual void send_message(message_ex *request) override;\n\n    // called by rpc engine\n    DSN_API virtual void inject_drop_message(message_ex *msg, bool is_send) override;\n\n    // to be defined\n    virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) = 0;\n\nprotected:\n    typedef std::unordered_map<::dsn::rpc_address, rpc_session_ptr> client_sessions;\n    client_sessions _clients; // to_address => rpc_session\n    utils::rw_lock_nr _clients_lock;\n\n    typedef std::unordered_map<::dsn::rpc_address, rpc_session_ptr> server_sessions;\n    server_sessions _servers; // from_address => rpc_session\n    typedef std::unordered_map<uint32_t, uint32_t> ip_connection_count;\n    ip_connection_count _ip_conn_count; // from_ip => connection count\n    utils::rw_lock_nr _servers_lock;\n\n    uint32_t _cfg_conn_threshold_per_ip;\n    perf_counter_wrapper _client_session_count;\n};\n\n/*!\n  session managements (both client and server types)\n*/\n\nnamespace security {\nclass negotiation;\n}\n\nclass rpc_client_matcher;\nclass rpc_session : public ref_counter\n{\npublic:\n    /*!\n    @addtogroup tool-api-hooks\n    @{\n    */\n    static join_point<void, rpc_session *> on_rpc_session_connected;\n    static join_point<void, rpc_session *> on_rpc_session_disconnected;\n    static join_point<bool, message_ex *> on_rpc_recv_message;\n    static join_point<bool, message_ex *> on_rpc_send_message;\n    /*@}*/\npublic:\n    rpc_session(connection_oriented_network &net,\n                ::dsn::rpc_address remote_addr,\n                message_parser_ptr &parser,\n                bool is_client);\n    virtual ~rpc_session();\n\n    virtual void connect() = 0;\n    virtual void close() = 0;\n\n    // Whether this session is launched on client side.\n    bool is_client() const { return _is_client; }\n\n    dsn::rpc_address remote_address() const { return _remote_addr; }\n    connection_oriented_network &net() const { return _net; }\n    message_parser_ptr parser() const { return _parser; }\n\n    ///\n    /// rpc_session's interface for sending and receiving\n    ///\n    void send_message(message_ex *msg);\n    bool cancel(message_ex *request);\n    bool delay_recv(int delay_ms);\n    bool on_recv_message(message_ex *msg, int delay_ms);\n    /// ret value:\n    ///    true  - pend succeed\n    ///    false - pend failed\n    bool try_pend_message(message_ex *msg);\n    void clear_pending_messages();\n\n    /// interfaces for security authentication,\n    /// you can ignore them if you don't enable auth\n    void set_negotiation_succeed();\n    bool is_negotiation_succeed() const;\n\n    void set_client_username(const std::string &user_name);\n    const std::string &get_client_username() const;\n\npublic:\n    ///\n    /// for subclass to implement receiving message\n    ///\n    void start_read_next(int read_next = 256);\n    // should be called in do_read() before using _parser when it is nullptr.\n    // returns:\n    //   -1 : prepare failed, maybe because of invalid message header type\n    //    0 : prepare succeed, _parser is not nullptr now.\n    //   >0 : need read more data, returns read_next.\n    int prepare_parser();\n    virtual void do_read(int read_next) = 0;\n\n    ///\n    /// for subclass to implement sending message\n    ///\n    // return whether there are messages for sending;\n    // should always be called in lock\n    bool unlink_message_for_send();\n    virtual void send(uint64_t signature) = 0;\n    void on_send_completed(uint64_t signature = 0);\n    virtual void on_failure(bool is_write = false);\n\nprotected:\n    ///\n    /// fields related to sending messages\n    ///\n    enum session_state\n    {\n        SS_CONNECTING,\n        SS_CONNECTED,\n        SS_DISCONNECTED\n    };\n    mutable utils::ex_lock_nr _lock; // [\n    volatile session_state _connect_state;\n\n    bool negotiation_succeed = false;\n    // when the negotiation of a session isn't succeed,\n    // all messages are queued in _pending_messages.\n    // after connected, all of them are moved to \"_messages\"\n    std::vector<message_ex *> _pending_messages;\n\n    // messages are sent in batch, firstly all messages are linked together\n    // in a doubly-linked list \"_messages\".\n    // if no messages are on-the-flying, a batch of messages are fetch from the \"_messages\"\n    // and put them to _sending_msgs; meanwhile, buffers of these messages are put\n    // in _sending_buffers\n    dlink _messages;\n    int _message_count; // count of _messages\n\n    bool _is_sending_next;\n\n    std::vector<message_ex *> _sending_msgs;\n    std::vector<message_parser::send_buf> _sending_buffers;\n\n    uint64_t _message_sent;\n    // ]\n\nprotected:\n    ///\n    /// change status and check status\n    ///\n    // return true when it is permitted\n    bool set_connecting();\n    // return true when it is permitted\n    bool set_disconnected();\n    void set_connected();\n\n    void clear_send_queue(bool resend_msgs);\n    bool on_disconnected(bool is_write);\n\nprotected:\n    // constant info\n    connection_oriented_network &_net;\n    dsn::rpc_address _remote_addr;\n    int _max_buffer_block_count_per_send;\n    message_reader _reader;\n    message_parser_ptr _parser;\n\nprivate:\n    const bool _is_client;\n    rpc_client_matcher *_matcher;\n\n    std::atomic_int _delay_server_receive_ms;\n\n    // _client_username is only valid if it is a server rpc_session.\n    // it represents the name of the corresponding client\n    std::string _client_username;\n};\n\n// --------- inline implementation --------------\n// return true if delay applied.\ninline bool rpc_session::delay_recv(int delay_ms)\n{\n    bool exchanged = false;\n    int old_delay_ms = _delay_server_receive_ms.load();\n    while (!exchanged && delay_ms > old_delay_ms) {\n        exchanged = _delay_server_receive_ms.compare_exchange_weak(old_delay_ms, delay_ms);\n    }\n    return exchanged;\n}\n\n/*@}*/\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/rpc_address.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <string>\n\n#include <arpa/inet.h>\n#include <thrift/protocol/TProtocol.h>\n#include <dsn/utility/string_conv.h>\n\ntypedef enum dsn_host_type_t {\n    HOST_TYPE_INVALID = 0,\n    HOST_TYPE_IPV4 = 1,\n    HOST_TYPE_GROUP = 2,\n} dsn_host_type_t;\n\nnamespace dsn {\n\nclass rpc_group_address;\n\nclass rpc_address\n{\npublic:\n    static const rpc_address s_invalid_address;\n    static bool is_docker_netcard(const char *netcard_interface, uint32_t ip_net);\n    static bool is_site_local_address(uint32_t ip_net);\n    static uint32_t ipv4_from_host(const char *hostname);\n    static uint32_t ipv4_from_network_interface(const char *network_interface);\n\n    ~rpc_address();\n\n    constexpr rpc_address() = default;\n\n    rpc_address(const rpc_address &another);\n\n    rpc_address &operator=(const rpc_address &another);\n\n    rpc_address(uint32_t ip, uint16_t port)\n    {\n        assign_ipv4(ip, port);\n        static_assert(sizeof(rpc_address) == sizeof(uint64_t),\n                      \"make sure rpc_address does not \"\n                      \"add new payload to dsn::rpc_address \"\n                      \"to keep it sizeof(uint64_t)\");\n    }\n\n    rpc_address(const char *host, uint16_t port) { assign_ipv4(host, port); }\n\n    void assign_ipv4(uint32_t ip, uint16_t port)\n    {\n        set_invalid();\n        _addr.v4.type = HOST_TYPE_IPV4;\n        _addr.v4.ip = ip;\n        _addr.v4.port = port;\n    }\n\n    void assign_ipv4(const char *host, uint16_t port)\n    {\n        set_invalid();\n        _addr.v4.type = HOST_TYPE_IPV4;\n        _addr.v4.ip = rpc_address::ipv4_from_host(host);\n        _addr.v4.port = port;\n    }\n\n    void assign_ipv4_local_address(const char *network_interface, uint16_t port)\n    {\n        set_invalid();\n        _addr.v4.type = HOST_TYPE_IPV4;\n        _addr.v4.ip = rpc_address::ipv4_from_network_interface(network_interface);\n        _addr.v4.port = port;\n    }\n\n    void assign_group(const char *name);\n\n    const char *to_string() const;\n\n    // return a.b.c.d if address is ipv4\n    const char *ipv4_str() const;\n\n    std::string to_std_string() const { return std::string(to_string()); }\n\n    // This function is used for validating the format of ipv4 like \"192.168.0.1:12345\"\n    // Due to historical legacy, we also consider \"localhost:8080\" is in a valid format\n    // IP address without port like \"127.0.0.1\" is invalid here\n    bool from_string_ipv4(const char *s)\n    {\n        set_invalid();\n        std::string ip_port = std::string(s);\n        auto pos = ip_port.find_last_of(':');\n        if (pos == std::string::npos) {\n            return false;\n        }\n        std::string ip = ip_port.substr(0, pos);\n        std::string port = ip_port.substr(pos + 1);\n        // check port\n        unsigned int port_num;\n        if (!dsn::internal::buf2unsigned(port, port_num) || port_num > UINT16_MAX) {\n            return false;\n        }\n        // check localhost & IP\n        uint32_t ip_addr;\n        if (ip == \"localhost\" || inet_pton(AF_INET, ip.c_str(), &ip_addr)) {\n            assign_ipv4(ip.c_str(), (uint16_t)port_num);\n            return true;\n        }\n        return false;\n    }\n\n    uint64_t &value() { return _addr.value; }\n\n    dsn_host_type_t type() const { return (dsn_host_type_t)_addr.v4.type; }\n\n    uint32_t ip() const { return (uint32_t)_addr.v4.ip; }\n\n    uint16_t port() const { return (uint16_t)_addr.v4.port; }\n\n    void set_port(uint16_t port) { _addr.v4.port = port; }\n\n    rpc_group_address *group_address() const\n    {\n        return (rpc_group_address *)(uintptr_t)_addr.group.group;\n    }\n\n    bool is_invalid() const { return _addr.v4.type == HOST_TYPE_INVALID; }\n\n    // before you assign new value, must call set_invalid() to release original value\n    // and you MUST ensure that _addr is INITIALIZED before you call this function\n    void set_invalid();\n\n    bool operator==(::dsn::rpc_address r) const\n    {\n        if (type() != r.type())\n            return false;\n\n        switch (type()) {\n        case HOST_TYPE_IPV4:\n            return ip() == r.ip() && _addr.v4.port == r.port();\n        case HOST_TYPE_GROUP:\n            return _addr.group.group == r._addr.group.group;\n        default:\n            return true;\n        }\n    }\n\n    bool operator!=(::dsn::rpc_address r) const { return !(*this == r); }\n\n    bool operator<(::dsn::rpc_address r) const\n    {\n        if (type() != r.type())\n            return type() < r.type();\n\n        switch (type()) {\n        case HOST_TYPE_IPV4:\n            return ip() < r.ip() || (ip() == r.ip() && port() < r.port());\n        case HOST_TYPE_GROUP:\n            return _addr.group.group < r._addr.group.group;\n        default:\n            return true;\n        }\n    }\n\n    // for serialization in thrift format\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\nprivate:\n    union\n    {\n        struct\n        {\n            unsigned long long type : 2;\n            unsigned long long padding : 14;\n            unsigned long long port : 16;\n            unsigned long long ip : 32;\n        } v4; ///< \\ref HOST_TYPE_IPV4\n        struct\n        {\n            unsigned long long type : 2;\n            unsigned long long group : 62; ///< dsn_group_t\n        } group;                           ///< \\ref HOST_TYPE_GROUP\n        uint64_t value;\n    } _addr{.value = 0};\n};\n\n} // namespace dsn\n\nnamespace std {\n\ntemplate <>\nstruct hash<::dsn::rpc_address>\n{\n    size_t operator()(const ::dsn::rpc_address &ep) const\n    {\n        switch (ep.type()) {\n        case HOST_TYPE_IPV4:\n            return std::hash<uint32_t>()(ep.ip()) ^ std::hash<uint16_t>()(ep.port());\n        case HOST_TYPE_GROUP:\n            return std::hash<void *>()(ep.group_address());\n        default:\n            return 0;\n        }\n    }\n};\n\n} // namespace std\n"
  },
  {
    "path": "include/dsn/tool-api/rpc_message.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <atomic>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/utility/blob.h>\n#include <dsn/utility/link.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/global_config.h>\n\nnamespace dsn {\nclass rpc_session;\ntypedef dsn::ref_ptr<rpc_session> rpc_session_ptr;\n\nstruct fast_code\n{\n    uint32_t local_code;\n\n    // same hash from two processes indicates that\n    // the mapping of rpc string and id are consistent, which\n    // we leverage for optimization (fast rpc handler lookup)\n    uint32_t local_hash;\n};\n\ntypedef union msg_context\n{\n    struct\n    {\n        uint64_t is_request : 1;           ///< whether the RPC message is a request or response\n        uint64_t is_forwarded : 1;         ///< whether the msg is forwarded or not\n        uint64_t unused : 4;               ///< not used yet\n        uint64_t serialize_format : 4;     ///< dsn_msg_serialize_format\n        uint64_t is_forward_supported : 1; ///< whether support forwarding a message to real leader\n        uint64_t is_backup_request : 1;    ///< whether the RPC is a backup request\n        uint64_t reserved : 52;\n    } u;\n    uint64_t context; ///< msg_context is of sizeof(uint64_t)\n} msg_context_t;\n\ntypedef struct message_header\n{\n    // For thrift protocol this is \"THFT\".\n    // For dsn protocol this is \"RDSN\".\n    // For http protocol this is either a \"GET \" or \"POST\".\n    uint32_t hdr_type;\n\n    uint32_t hdr_version;\n    uint32_t hdr_length;\n    uint32_t hdr_crc32;\n    uint32_t body_length;\n    uint32_t body_crc32;\n    uint64_t id;       // sequence id, used to match request and response\n    uint64_t trace_id; // used for tracking source\n    char rpc_name[DSN_MAX_TASK_CODE_NAME_LENGTH];\n    fast_code rpc_code; // dsn::task_code\n    dsn::gpid gpid;     // global partition id\n    msg_context_t context;\n\n    // Attention:\n    // here, from_address must be IPv4 address, namely we can regard from_address as a\n    // POD-type structure, so no memory-leak will occur even if we don't call it's\n    // destructor.\n    //\n    // generally, it is the from_node's primary address, except the\n    // case described in message_ex::create_response()'s ATTENTION comment.\n    //\n    // in the forwarding case, the from_address is always the orignal client's address\n    rpc_address from_address;\n\n    struct\n    {\n        int32_t timeout_ms;      // rpc timeout in milliseconds\n        int32_t thread_hash;     // thread hash used for thread dispatching\n        uint64_t partition_hash; // partition hash used for calculating partition index\n    } client;\n\n    struct\n    {\n        char error_name[DSN_MAX_ERROR_CODE_NAME_LENGTH];\n        fast_code error_code; // dsn::error_code\n    } server;\n\n    message_header() = default;\n    ~message_header() = default;\n} message_header;\n\nclass message_ex : public ref_counter, public extensible_object<message_ex, 4>\n{\npublic:\n    message_header *header;\n    // \"buffers\" are used to manage memory allocated for this message.\n    // the memory used by \"header\" is also mamanged in \"buffers\".\n    //\n    // please see \"create_request\", \"create_recieve_message\",\n    // \"create_receive_message_with_standalone_header\" for the details on\n    // how the headers managed by buffer\n    std::vector<blob> buffers;\n\n    // by rpc and network\n    rpc_session_ptr io_session; // send/recv session\n    rpc_address to_address;     // always ipv4/v6 address, it is the to_node's net address\n    rpc_address server_address; // used by requests, and may be of uri/group address\n    dsn::task_code local_rpc_code;\n    network_header_format hdr_format;\n    int send_retry_count;\n\n    // by message queuing\n    dlink dl;\n\npublic:\n    // message_ex(blob bb, bool parse_hdr = true); // read\n    DSN_API ~message_ex();\n\n    //\n    // utility routines\n    //\n    DSN_API error_code error();\n    DSN_API task_code rpc_code();\n    static uint64_t new_id() { return ++_id; }\n    static unsigned int get_body_length(char *hdr) { return ((message_header *)hdr)->body_length; }\n\n    //\n    // routines for create messages\n    //\n    DSN_API static message_ex *create_receive_message(const blob &data);\n    DSN_API static message_ex *create_request(dsn::task_code rpc_code,\n                                              int timeout_milliseconds = 0,\n                                              int thread_hash = 0,\n                                              uint64_t partition_hash = 0);\n\n    DSN_API static message_ex *create_received_request(dsn::task_code rpc_code,\n                                                       dsn_msg_serialize_format format,\n                                                       void *buffer,\n                                                       int size,\n                                                       int thread_hash = 0,\n                                                       uint64_t partition_hash = 0);\n\n    /// This method is only used for receiving request.\n    /// The returned message:\n    ///   - msg->buffers[0] = message_header\n    ///   - msg->buffers[1] = data\n    /// NOTE: the reference counter of returned message_ex is not added in this function\n    DSN_API static message_ex *create_receive_message_with_standalone_header(const blob &data);\n\n    /// copy message without client information, it will not reply\n    /// The returned message:\n    ///   - msg->buffers[0] = message_header\n    ///   - msg->buffers[1] = data\n    DSN_API static message_ex *copy_message_no_reply(const message_ex &old_msg);\n\n    /// The returned message:\n    ///   - msg->buffers[0] = message_header\n    ///   - msg->_is_read = false\n    ///   - msg->_rw_index = 0\n    ///   - msg->_rw_offset = 48 (size of message_header)\n    DSN_API message_ex *create_response();\n\n    DSN_API message_ex *copy(bool clone_content, bool copy_for_receive);\n    DSN_API message_ex *copy_and_prepare_send(bool clone_content);\n\n    //\n    // routines for buffer management\n    //\n    DSN_API void write_next(void **ptr, size_t *size, size_t min_size);\n    DSN_API void write_commit(size_t size);\n    DSN_API bool read_next(void **ptr, size_t *size);\n    bool read_next(blob &data);\n    DSN_API void read_commit(size_t size);\n    size_t body_size() { return (size_t)header->body_length; }\n    DSN_API void *rw_ptr(size_t offset_begin);\n\n    // rpc_read_stream can read a msg many times by restore()\n    // rpc_read_stream stream1(msg)\n    // msg->restore_read()\n    // rpc_read_stream stream2(msg)\n    DSN_API void restore_read();\n\n    bool is_backup_request() const { return header->context.u.is_backup_request; }\n\nprivate:\n    DSN_API message_ex();\n    DSN_API void prepare_buffer_header();\n    DSN_API void release_buffer_header();\n\nprivate:\n    static std::atomic<uint64_t> _id;\n\nprivate:\n    // by msg read & write\n    int _rw_index;      // current buffer index\n    int _rw_offset;     // current buffer offset\n    bool _rw_committed; // mark if it is in middle state of reading/writing\n    bool _is_read;      // is for read(recv) or write(send)\n\npublic:\n    static uint32_t s_local_hash; // used by fast_rpc_name\n};\ntypedef dsn::ref_ptr<message_ex> message_ptr;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/task.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the task abstraction in zion, as well as the derived various types of\n *     tasks in our system\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <functional>\n#include <tuple>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/apply.h>\n#include <dsn/utility/binary_writer.h>\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/c/api_task.h>\n#include <dsn/c/api_layer1.h>\n\nnamespace dsn {\n\nclass task_worker;\nclass task_worker_pool;\nclass service_node;\nclass task_engine;\nclass task_queue;\nclass rpc_engine;\nclass disk_engine;\nclass env_provider;\nclass timer_service;\nclass task;\n\nstruct __tls_dsn__\n{\n    uint32_t magic;\n    task *current_task;\n\n    task_worker *worker;\n    int worker_index;\n    service_node *node;\n    int node_id;\n\n    rpc_engine *rpc;\n    env_provider *env;\n\n    int last_worker_queue_size;\n    uint64_t node_pool_thread_ids; // 8,8,16 bits\n    uint32_t last_lower32_task_id; // 32bits\n};\n\nextern __thread struct __tls_dsn__ tls_dsn;\n\n///\n/// Task is a thread-like execution piece that is much lighter than a normal thread.\n/// Huge number of tasks may be hosted by a small number of actual threads run within\n/// a thread pool.\n///\n/// When creating the task, user must use 3 parameters to specify in which thread the\n/// callback should run:\n///\n///     1. node: specifies the computation engine of the callback, i.e, the \"pool\" of \"thread_pool\"\n///     2. task_code: a index to the \"task_spec\". task_spec specifies which thread pool of\n///        the computation engine to run the callback. some other task information is also\n///        recorded in task_spec. please refer to @task_code, @task_spec, @thread_pool_code\n///        for more details.\n///     3. hash: specifies which thread in the thread pool to execute the callback. (This is\n///        somewhat not accurate, coz \"hash\" will work together with thread_pool's \"partition\"\n///        option. Please refer to @task_worker_pool for more details).\n///\n/// So the running thread of callback will be determined hierarchically:\n///\n///         |<---determined by \"node\"\n///         |        |<-----determined by \"code\"\n///         |        |       |-------------determined by \"hash\"\n///         |        |       |\n///  |------V--------|-------|---------------------------|     |--------------|\n///  | |-------------V-------|-----|     |-------------| |     |              |\n///  | | |--------|     |----V---| |     |             | |     |              |\n///  | | | thread | ... | thread | |     |             | |     |              |\n///  | | |--------|     |--------| |     |             | |     |              |\n///  | |       thread pool         | ... | thread pool | |     |              |\n///  | |---------------------------|     |-------------| |     |              |\n///  |                 service node                      | ... | service node |\n///  |---------------------------------------------------|     |--------------|\n///\n/// A status value called \"task_state\" is kept in each task to indicate the task running state,\n/// the transition among different states are:\n///\n///                      |\n///              (new created task)\n///                      |\n///                      V\n///      |-----------> ready------------|\n///      |               |              |\n/// (timer task)     (execute)       (cancel)\n///      |               |              |\n///      |               V              V\n///      |------------ running      cancelled\n///                      |\n///                (one shot task)\n///                      |\n///                      V\n///                   finished\n///\n/// As shown above, a new created task will be in \"ready\" state. After obtaining the data\n/// (like a rpc gets the response, or a disk io succeeds), the data provider module will store\n/// proper value into the task and dispatch it to some thread to execute the task with \"enqueue\".\n///\n/// After a task is \"dequeued\" from thread, it will be in \"running\" state.\n///\n/// For one shot tasks(in which callback can ONLY be executed ONCE),  the task will be in\n/// \"finished\" state after running of callbacks.\n///\n/// But for timer tasks, the state will transit to \"ready\" again after \"running\"\n/// as the callback need to be executed periodically.\n///\n/// The callers can cancel the execution of \"ready\" tasks with method \"cancel\". However,\n/// if a task is in not in \"ready\" state, the cancel will fail (returning false).\n///\n/// So from the perspective of task user, a created task can only be controlled by \"enqueue\" or\n/// \"cancel\". An \"enqueue\" operation will make the callback to execute at some time in the future,\n/// and an \"cancel\" operation will prevent the callback from executing.\n///\n/// So please take care when you call cancel. Memory leak may occur if you don't pay attention.\n/// For example:\n///\n/// int *a = new int(5);\n/// raw_task t = new raw_task(code, [a](){ std::cout << *a << std::endl; delete a; }, hash, node);\n/// t->enqueue(10_seconds_latey);\n/// if (t->cancel()) {\n///    std::cout << \"cancel succeed, the callback will not execute and a won't be deleted\"\n///               << std::endl;\n/// }\n///\n/// In order to prevent this, we recommend you to pass RAII objects to callback:\n///\n/// std::shared_ptr<int> a = std::make_shared<int>(5);\n/// raw_task t = new raw_task(code, [a]() { std::cout << *a << std::endl; }, hash, node);\n///\n/// In this case, the callback object will be destructed if t is cancelled.\n///\n/// Another key design in rDSN is that we add some \"hook points\" when the state is in transition,\n/// like \"on_task_create\", \"on_task_enqueue\", \"on_task_dequeue\", etc. We can execute different\n/// functions for different purposes on these hook points, you may want to refer to\n/// \"tracer\", \"profiler\" and \"fault_injector\" for details.\n///\nclass task : public ref_counter, public extensible_object<task, 4>\n{\npublic:\n    task(task_code code, int hash = 0, service_node *node = nullptr);\n\n    virtual ~task();\n    virtual void enqueue();\n\n    //\n    // if we successfully change a task's state from ready to cancelled, then return true,\n    // otherwise, false is returned.\n    //\n    // if wait_until_finished is true, the function will wait a running task to be finished\n    //\n    // if finished isn't nullptr, it will be used to indicate\n    // whether the task is finished/cancelled, that is to say:\n    //    *finished == true <- the task has been finished/cancelled when this method returns\n    //    *finished == false <- the task may still be running or ready(timer tasks)\n    //\n    bool cancel(bool wait_until_finished, /*out*/ bool *finished = nullptr);\n\n    // wait until a task to finished/cancelled or timeout\n    bool wait(int timeout_milliseconds = TIME_MS_MAX);\n\n    // TODO: modify delay from chrono to int, keep in consistency with other API\n    void enqueue(std::chrono::milliseconds delay)\n    {\n        set_delay(static_cast<int>(delay.count()));\n        enqueue();\n    }\n\n    // used for task_worker to execute the task\n    void exec_internal();\n\n    // only call this function when task is running\n    //\n    // @param enqueue_immediately\n    //    whether we should enqueue right now\n    // return:\n    //    true : change task state from TASK_STATE_RUNNING to TASK_STATE_READY succeed\n    //    false : change task state failed\n    bool set_retry(bool enqueue_immediately = true);\n\n    void set_error_code(error_code err) { _error = err; }\n    void set_delay(int delay_milliseconds = 0) { _delay_milliseconds = delay_milliseconds; }\n    void set_tracker(task_tracker *tracker) { _context_tracker.set_tracker(tracker, this); }\n\n    uint64_t id() const { return _task_id; }\n    task_state state() const { return _state.load(std::memory_order_acquire); }\n    task_code code() const { return _spec->code; }\n    task_spec &spec() const { return *_spec; }\n    int hash() const { return _hash; }\n    int delay_milliseconds() const { return _delay_milliseconds; }\n    error_code error() const { return _error; }\n    service_node *node() const { return _node; }\n    task_tracker *tracker() const { return _context_tracker.tracker(); }\n    bool is_empty() const { return _is_null; }\n\n    // static helper utilities\n    static task *get_current_task();\n    static uint64_t get_current_task_id();\n    static task_worker *get_current_worker();\n    static task_worker *get_current_worker2();\n    static service_node *get_current_node();\n    static service_node *get_current_node2();\n    static int get_current_node_id();\n    static int get_current_worker_index();\n    static const char *get_current_node_name();\n    static rpc_engine *get_current_rpc();\n    static env_provider *get_current_env();\n\n    static void set_tls_dsn_context(\n        service_node *node, // cannot be null\n        task_worker *worker // null for io or timer threads if they are not worker threads\n        );\n\nprotected:\n    void enqueue(task_worker_pool *pool);\n    void set_task_id(uint64_t tid) { _task_id = tid; }\n\n    virtual void exec() = 0;\n    //\n    // this function is used for clearing the non-trivial objects assigned to this task, like\n    // callback functors and some task-specific values.\n    //\n    // circular reference may occur if we don't clear them manually, for example:\n    //\n    // class A: public dsn::ref_counter {\n    // public:\n    //   int value;\n    //   task_ptr my_task;\n    // };\n    //\n    // dsn::ref_ptr<A> a_obj = new A();\n    // a_obj->my_task = tasking::enqueue(task_code,\n    //                                   [a_obj](){ std::cout << value << std::endl; });\n    //\n    // in the case above, a_obj holds a ref_counter for my_task,\n    // my task holds a ref_counter for a_obj because it owns a functor\n    // which captures a_obj by value\n    //\n    // in order to prevent this case, we let the task to clear these non-trival objects when\n    // a task is finished or cancelled.\n    //\n    // we may call this function in \"exec_internal\" or \"cancel\". however, it's still subclass's\n    // duty to define \"how to clear the callback\".\n    //\n    // don't declare this as pure virtual function, coz it is not necessary for every subclass\n    // to have non trivial objects to clear.\n    //\n    virtual void clear_non_trivial_on_task_end() {}\n\n    bool _is_null;\n    error_code _error;\n\nprivate:\n    friend class task_test;\n\n    task(const task &);\n    bool wait_on_cancel();\n\n    // return true if some waiters have been notified\n    bool signal_waiters();\n\n    static void check_tls_dsn();\n    static void on_tls_dsn_not_set();\n\n    mutable std::atomic<task_state> _state;\n    uint64_t _task_id;\n    std::atomic<void *> _wait_event;\n    int _hash;\n    int _delay_milliseconds;\n    bool _wait_for_cancel;\n    task_spec *_spec;\n    service_node *_node;\n    trackable_task _context_tracker; // when tracker is gone, the task is cancelled automatically\n\npublic:\n    // used by task queue only\n    task *next;\n};\ntypedef dsn::ref_ptr<dsn::task> task_ptr;\n\nclass raw_task : public task\n{\npublic:\n    raw_task(task_code code, const task_handler &cb, int hash = 0, service_node *node = nullptr)\n        : task(code, hash, node), _cb(cb)\n    {\n    }\n    raw_task(task_code code, task_handler &&cb, int hash = 0, service_node *node = nullptr)\n        : task(code, hash, node), _cb(std::move(cb))\n    {\n    }\n\n    void exec() override\n    {\n        if (dsn_likely(_cb != nullptr)) {\n            _cb();\n        }\n    }\n\nprotected:\n    void clear_non_trivial_on_task_end() override { _cb = nullptr; }\n\nprotected:\n    task_handler _cb;\n};\n\n//----------------- timer task -------------------------------------------------------\n\nclass timer_task : public task\n{\npublic:\n    timer_task(task_code code,\n               const task_handler &cb,\n               int interval_milliseconds,\n               int hash = 0,\n               service_node *node = nullptr);\n    timer_task(task_code code,\n               task_handler &&cb,\n               int interval_milliseconds,\n               int hash = 0,\n               service_node *node = nullptr);\n\n    // for timer task, we will reset its state to TASK_READY after exec\n    void exec() override;\n    void enqueue() override;\n\nprotected:\n    void clear_non_trivial_on_task_end() override { _cb = nullptr; }\n\nprivate:\n    // ATTENTION: if _interval_milliseconds <= 0, then timer task will just be executed once;\n    // otherwise, timer task will be executed periodically(period = _interval_milliseconds)\n    int _interval_milliseconds;\n    task_handler _cb;\n};\n\ntemplate <typename First, typename... Remaining>\nclass future_task : public task\n{\npublic:\n    typedef std::function<void(const First, const Remaining &...)> TCallback;\n    future_task(task_code code, const TCallback &cb, int hash, service_node *node = nullptr)\n        : task(code, hash, node), _cb(cb)\n    {\n    }\n    future_task(task_code code, TCallback &&cb, int hash, service_node *node = nullptr)\n        : task(code, hash, node), _cb(std::move(cb))\n    {\n    }\n    virtual void exec() override { dsn::apply(_cb, std::move(_values)); }\n\n    void enqueue_with(const First &t, const Remaining &... r, int delay_ms = 0)\n    {\n        _values = std::make_tuple(t, r...);\n        set_delay(delay_ms);\n        enqueue();\n    }\n    void enqueue_with(First &&t, Remaining &&... r, int delay_ms = 0)\n    {\n        _values = std::make_tuple(std::move(t), std::forward<Remaining>(r)...);\n        set_delay(delay_ms);\n        enqueue();\n    }\n\nprotected:\n    void clear_non_trivial_on_task_end() override\n    {\n        _cb = nullptr;\n        _values = {};\n    }\n\nprivate:\n    TCallback _cb;\n    std::tuple<First, Remaining...> _values;\n};\n\nclass rpc_request_task : public task\n{\npublic:\n    rpc_request_task(message_ex *request, rpc_request_handler &&h, service_node *node);\n    virtual ~rpc_request_task() override;\n\n    message_ex *get_request() const { return _request; }\n\n    void enqueue() override;\n\n    void exec() override\n    {\n        if (0 == _enqueue_ts_ns ||\n            dsn_now_ns() - _enqueue_ts_ns <\n                static_cast<uint64_t>(_request->header->client.timeout_ms) * 1000000ULL) {\n            if (dsn_likely(nullptr != _handler)) {\n                _handler(_request);\n            }\n        } else {\n            dinfo(\"rpc_request_task(%s) from(%s) stop to execute due to timeout_ms(%d) exceed\",\n                  spec().name.c_str(),\n                  _request->header->from_address.to_string(),\n                  _request->header->client.timeout_ms);\n            spec().on_rpc_task_dropped.execute(this);\n        }\n    }\n\nprotected:\n    void clear_non_trivial_on_task_end() override { _handler = nullptr; }\n\nprotected:\n    message_ex *_request;\n    rpc_request_handler _handler;\n    uint64_t _enqueue_ts_ns;\n};\ntypedef dsn::ref_ptr<rpc_request_task> rpc_request_task_ptr;\n\nclass rpc_response_task : public task\n{\npublic:\n    rpc_response_task(message_ex *request,\n                      const rpc_response_handler &cb,\n                      int hash = 0,\n                      service_node *node = nullptr);\n    rpc_response_task(message_ex *request,\n                      rpc_response_handler &&cb,\n                      int hash,\n                      service_node *node = nullptr);\n    virtual ~rpc_response_task() override;\n\n    // return true for normal case, false for fault injection applied\n    bool enqueue(error_code err, message_ex *reply);\n\n    // re-enqueue after above enqueue, e.g., after delay\n    void enqueue() override;\n\n    void exec() override\n    {\n        if (dsn_likely(nullptr != _cb)) {\n            _cb(_error, _request, _response);\n        }\n    }\n\n    message_ex *get_request() const { return _request; }\n    message_ex *get_response() const { return _response; }\n\n    //\n    // rpc_response_task is a special kind of task, because\n    // we support the semantic of distributed service in sending rpc request, for example:\n    // \"querying meta-server and send message to proper replica-server based on gpid\".\n    //\n    // in order to support this, we need to replace the original rpc response callback\n    // to another one:\n    //    1. which can proecss the distributed service semantics, AND\n    //    2. which won't call the original callback until real results are got\n    //\n    // we supply 2 functions to meet this demand:\n    //    1. current_handler, you can get the original handler if you want to replace it\n    //    2. replace_callback, replace the callback to any one you like. there are two varieties\n    //       for coping or move the new callback\n    //\n    // yo may want to refer to rpc_engine::call_uri for details\n    //\n    // TODO(sunweijie): totally elimite this feature\n    //\n    void fetch_current_handler(rpc_response_handler &cb) { cb = std::move(_cb); }\n    void replace_callback(rpc_response_handler &&cb)\n    {\n        task_state cur_state = state();\n        dassert(cur_state == TASK_STATE_READY || cur_state == TASK_STATE_RUNNING,\n                \"invalid task_state: %s\",\n                enum_to_string(cur_state));\n        _cb = std::move(cb);\n    }\n    void replace_callback(const rpc_response_handler &cb)\n    {\n        replace_callback(rpc_response_handler(cb));\n    }\n\n    task_worker_pool *caller_pool() const { return _caller_pool; }\n    void set_caller_pool(task_worker_pool *pl) { _caller_pool = pl; }\n\nprotected:\n    void clear_non_trivial_on_task_end() override { _cb = nullptr; }\n\nprivate:\n    message_ex *_request;\n    message_ex *_response;\n    task_worker_pool *_caller_pool;\n    rpc_response_handler _cb;\n\n    friend class rpc_engine;\n};\ntypedef dsn::ref_ptr<rpc_response_task> rpc_response_task_ptr;\n\nconst std::vector<task_worker *> &get_threadpool_threads_info(threadpool_code code);\n\n// ------------------------ inline implementations --------------------\n__inline /*static*/ void task::check_tls_dsn()\n{\n    if (tls_dsn.magic != 0xdeadbeef) {\n        on_tls_dsn_not_set();\n    }\n}\n\n__inline /*static*/ task *task::get_current_task()\n{\n    check_tls_dsn();\n    return tls_dsn.current_task;\n}\n\n__inline /*static*/ uint64_t task::get_current_task_id()\n{\n    if (tls_dsn.magic == 0xdeadbeef)\n        return tls_dsn.current_task ? tls_dsn.current_task->id() : 0;\n    else\n        return 0;\n}\n\n__inline /*static*/ task_worker *task::get_current_worker()\n{\n    check_tls_dsn();\n    return tls_dsn.worker;\n}\n\n__inline /*static*/ task_worker *task::get_current_worker2()\n{\n    return tls_dsn.magic == 0xdeadbeef ? tls_dsn.worker : nullptr;\n}\n\n__inline /*static*/ service_node *task::get_current_node()\n{\n    check_tls_dsn();\n    return tls_dsn.node;\n}\n\n__inline /*static*/ int task::get_current_node_id()\n{\n    return tls_dsn.magic == 0xdeadbeef ? tls_dsn.node_id : 0;\n}\n\n__inline /*static*/ service_node *task::get_current_node2()\n{\n    return tls_dsn.magic == 0xdeadbeef ? tls_dsn.node : nullptr;\n}\n\n__inline /*static*/ int task::get_current_worker_index()\n{\n    check_tls_dsn();\n    return tls_dsn.worker_index;\n}\n\n__inline /*static*/ rpc_engine *task::get_current_rpc()\n{\n    check_tls_dsn();\n    return tls_dsn.rpc;\n}\n\n__inline /*static*/ env_provider *task::get_current_env()\n{\n    check_tls_dsn();\n    return tls_dsn.env;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/task_code.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/enum_helper.h>\n#include <dsn/tool-api/threadpool_code.h>\n#include <thrift/protocol/TProtocol.h>\n\ntypedef enum dsn_task_type_t {\n    TASK_TYPE_RPC_REQUEST,  ///< task handling rpc request\n    TASK_TYPE_RPC_RESPONSE, ///< task handling rpc response or timeout\n    TASK_TYPE_COMPUTE,      ///< async calls or timers\n    TASK_TYPE_AIO,          ///< callback for file read and write\n    TASK_TYPE_CONTINUATION, ///< above tasks are seperated into several continuation\n                            ///< tasks by thread-synchronization operations.\n                            ///< so that each \"task\" is non-blocking\n    TASK_TYPE_COUNT,\n    TASK_TYPE_INVALID\n} dsn_task_type_t;\n\nENUM_BEGIN(dsn_task_type_t, TASK_TYPE_INVALID)\nENUM_REG(TASK_TYPE_RPC_REQUEST)\nENUM_REG(TASK_TYPE_RPC_RESPONSE)\nENUM_REG(TASK_TYPE_COMPUTE)\nENUM_REG(TASK_TYPE_AIO)\nENUM_REG(TASK_TYPE_CONTINUATION)\nENUM_END(dsn_task_type_t)\n\ntypedef enum dsn_task_priority_t {\n    TASK_PRIORITY_LOW,\n    TASK_PRIORITY_COMMON,\n    TASK_PRIORITY_HIGH,\n    TASK_PRIORITY_COUNT,\n    TASK_PRIORITY_INVALID\n} dsn_task_priority_t;\n\nENUM_BEGIN(dsn_task_priority_t, TASK_PRIORITY_INVALID)\nENUM_REG(TASK_PRIORITY_LOW)\nENUM_REG(TASK_PRIORITY_COMMON)\nENUM_REG(TASK_PRIORITY_HIGH)\nENUM_END(dsn_task_priority_t)\n\nnamespace dsn {\n\n/// task code is an index for a specific kind of task. with the index, you can\n/// get properties of this kind of task: name, type, priority, etc. you may want to refer to\n/// task_spec.h for the detailed task properties.\n///\n/// Like dsn::blob, task_code is a special thrift primitive type that's defined\n/// by the rDSN framework. Internally as a C++ object, it's is represented as an integer,\n/// but in thrift representation it's serialized as a string.\n///\n/// It should be noted that a task_code may have different code number in two different\n/// clusters. So DO NOT use a integer as task_code.\n///\n///  **.thrift\n///    x: 1: i32  task_code;\n///    ✓: 1: dsn.task_code  task_code;\n///\nclass task_code\n{\npublic:\n    constexpr task_code() = default;\n\n    constexpr explicit task_code(int code) : _internal_code(code) {}\n\n    task_code(const char *name,\n              dsn_task_type_t tt,\n              dsn_task_priority_t pri,\n              dsn::threadpool_code pool);\n\n    task_code(const char *name,\n              dsn_task_type_t tt,\n              dsn_task_priority_t pri,\n              dsn::threadpool_code pool,\n              bool is_storage_write,\n              bool allow_batch,\n              bool is_idempotent);\n\n    const char *to_string() const;\n\n    constexpr bool operator==(const task_code &r) { return _internal_code == r._internal_code; }\n\n    constexpr bool operator!=(const task_code &r) { return !(*this == r); }\n\n    constexpr operator int() const { return _internal_code; }\n\n    constexpr int code() const { return _internal_code; }\n\n    // for serialization in thrift format\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    static int max();\n    static bool is_exist(const char *name);\n    static task_code try_get(const char *name, task_code default_value);\n    static task_code try_get(const std::string &name, task_code default_value);\n\n    friend std::ostream &operator<<(std::ostream &os, const task_code &tc)\n    {\n        return os << std::string(tc.to_string());\n    }\n\nprivate:\n    task_code(const char *name);\n    int _internal_code{0};\n};\n\n// you can define task_code by the following macros\n#define DEFINE_NAMED_TASK_CODE(x, name, pri, pool)                                                 \\\n    __selectany const ::dsn::task_code x(#name, TASK_TYPE_COMPUTE, pri, pool);\n\n#define DEFINE_NAMED_TASK_CODE_AIO(x, name, pri, pool)                                             \\\n    __selectany const ::dsn::task_code x(#name, TASK_TYPE_AIO, pri, pool);\n\n#define DEFINE_NAMED_TASK_CODE_RPC(x, name, pri, pool)                                             \\\n    __selectany const ::dsn::task_code x(#name, TASK_TYPE_RPC_REQUEST, pri, pool);                 \\\n    __selectany const ::dsn::task_code x##_ACK(#name \"_ACK\", TASK_TYPE_RPC_RESPONSE, pri, pool);\n\n/*! define a new task code with TASK_TYPE_COMPUTATION */\n#define DEFINE_TASK_CODE(x, pri, pool) DEFINE_NAMED_TASK_CODE(x, x, pri, pool)\n#define DEFINE_TASK_CODE_AIO(x, pri, pool) DEFINE_NAMED_TASK_CODE_AIO(x, x, pri, pool)\n#define DEFINE_TASK_CODE_RPC(x, pri, pool) DEFINE_NAMED_TASK_CODE_RPC(x, x, pri, pool)\n\n// define a rpc code for storage engine\n//\n// storage engine's rpc code is special because\n// 1. we need to find a proper replica to serve the rpc\n//    then forward it to the storage engine atop of replica.\n// 2. for a write rpc, a primary may also need to replicate it\n//    to secondaries before forwarding to the storage engine.\n// 3. some storage engine's rpc shouldn't be batched,\n//    either for better performance or correctness.\n// 4. some write rpc is idempotent, but some is not.\n//    we should differentiate it.\n// so we define some specical fields in task_spec to mark these features.\n//\n// please refer to rpc_engine::on_recv_request for the detailes on how storage_engine's rpc\n// is handled\n//\n// Notice we dispatch storage rpc's response to THREAD_POOL_DEFAULT,\n// the reason is that the storage rpc's response mainly runs at client side, which is not\n// necessary to start so many threadpools\n#define DEFINE_STORAGE_RPC_CODE(x, pri, pool, is_write, allow_batch, is_idempotent)                \\\n    __selectany const ::dsn::task_code x(                                                          \\\n        #x, TASK_TYPE_RPC_REQUEST, pri, pool, is_write, allow_batch, is_idempotent);               \\\n    __selectany const ::dsn::task_code x##_ACK(#x \"_ACK\",                                          \\\n                                               TASK_TYPE_RPC_RESPONSE,                             \\\n                                               pri,                                                \\\n                                               THREAD_POOL_DEFAULT,                                \\\n                                               is_write,                                           \\\n                                               allow_batch,                                        \\\n                                               is_idempotent);\n\n#define ALLOW_BATCH true\n#define NOT_ALLOW_BATCH false\n#define IS_IDEMPOTENT true\n#define NOT_IDEMPOTENT false\n\n// define a default task code \"task_code_invalid\", it's mainly used for representing\n// some error status when you want to return task_code in some functions.\nDEFINE_TASK_CODE(TASK_CODE_INVALID, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\n// define a task_code \"task_code_inlined\", it's mainly used in situations when you want execute\n// a task with \"inline\" mode.\nDEFINE_TASK_CODE(TASK_CODE_EXEC_INLINED, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n}\n"
  },
  {
    "path": "include/dsn/tool-api/task_queue.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     task queue abstraction\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/task.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/utility/dlib.h>\n\nnamespace dsn {\n\nclass task_worker;\nclass task_worker_pool;\n\n/*!\n@addtogroup tool-api-providers\n@{\n*/\n/*!\n  task queue batches the input queue for the bound task worker(s) (threads)\n */\nclass task_queue\n{\npublic:\n    template <typename T>\n    static task_queue *create(task_worker_pool *pool, int index, task_queue *inner_provider)\n    {\n        return new T(pool, index, inner_provider);\n    }\n\n    typedef task_queue *(*factory)(task_worker_pool *, int, task_queue *);\n\npublic:\n    DSN_API task_queue(task_worker_pool *pool, int index, task_queue *inner_provider);\n    DSN_API virtual ~task_queue();\n\n    virtual void enqueue(task *task) = 0;\n    // dequeue may return more than 1 tasks, but there is a configured\n    // best batch size for each worker so that load among workers\n    // are balanced,\n    // returned batch size is stored in parameter batch_size\n    virtual task *dequeue(/*inout*/ int &batch_size) = 0;\n\n    int count() const { return _queue_length.load(std::memory_order_relaxed); }\n    int decrease_count(int count = 1)\n    {\n        _queue_length_counter->add((int64_t)(-count));\n        return _queue_length.fetch_sub(count, std::memory_order_relaxed) - count;\n    }\n    int increase_count(int count = 1)\n    {\n        _queue_length_counter->add(count);\n        return _queue_length.fetch_add(count, std::memory_order_relaxed) + count;\n    }\n    const std::string &get_name() { return _name; }\n    task_worker_pool *pool() const { return _pool; }\n    int index() const { return _index; }\n    volatile int *get_virtual_length_ptr() { return &_virtual_queue_length; }\n\nprivate:\n    friend class task_worker_pool;\n    void enqueue_internal(task *task);\n\nprivate:\n    task_worker_pool *_pool;\n    std::string _name;\n    int _index;\n    std::atomic<int> _queue_length;\n    dsn::perf_counter_wrapper _queue_length_counter;\n    dsn::perf_counter_wrapper _delay_task_counter;\n    dsn::perf_counter_wrapper _reject_task_counter;\n    threadpool_spec *_spec;\n    volatile int _virtual_queue_length;\n};\n/*@}*/\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/tool-api/task_spec.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     specification for the labeled tasks (task kinds)\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/config_helper.h>\n#include <dsn/utility/enum_helper.h>\n#include <dsn/utility/customizable_id.h>\n#include <dsn/utility/join_point.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/exp_delay.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/perf_counter/perf_counter.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/c/api_utilities.h>\n\nENUM_BEGIN(dsn_log_level_t, LOG_LEVEL_INVALID)\nENUM_REG(LOG_LEVEL_INFORMATION)\nENUM_REG(LOG_LEVEL_DEBUG)\nENUM_REG(LOG_LEVEL_WARNING)\nENUM_REG(LOG_LEVEL_ERROR)\nENUM_REG(LOG_LEVEL_FATAL)\nENUM_END(dsn_log_level_t)\n\nnamespace dsn {\n\nenum task_state\n{\n    TASK_STATE_READY,\n    TASK_STATE_RUNNING,\n    TASK_STATE_FINISHED,\n    TASK_STATE_CANCELLED,\n    TASK_STATE_COUNT,\n    TASK_STATE_INVALID\n};\n\nENUM_BEGIN(task_state, TASK_STATE_INVALID)\nENUM_REG(TASK_STATE_READY)\nENUM_REG(TASK_STATE_RUNNING)\nENUM_REG(TASK_STATE_FINISHED)\nENUM_REG(TASK_STATE_CANCELLED)\nENUM_END(task_state)\n\ntypedef enum grpc_mode_t {\n    GRPC_TO_LEADER, // the rpc is sent to the leader (if exist)\n    GRPC_TO_ALL,    // the rpc is sent to all\n    GRPC_TO_ANY,    // the rpc is sent to one of the group member\n    GRPC_COUNT,\n    GRPC_INVALID\n} grpc_mode_t;\n\nENUM_BEGIN(grpc_mode_t, GRPC_INVALID)\nENUM_REG(GRPC_TO_LEADER)\nENUM_REG(GRPC_TO_ALL)\nENUM_REG(GRPC_TO_ANY)\nENUM_END(grpc_mode_t)\n\ntypedef enum throttling_mode_t {\n    TM_NONE,   // no throttling applied\n    TM_REJECT, // reject the incoming request\n    TM_DELAY,  // delay network receive ops to reducing incoming rate\n    TM_COUNT,\n    TM_INVALID\n} throttling_mode_t;\n\nENUM_BEGIN(throttling_mode_t, TM_INVALID)\nENUM_REG(TM_NONE)\nENUM_REG(TM_REJECT)\nENUM_REG(TM_DELAY)\nENUM_END(throttling_mode_t)\n\ntypedef enum dsn_msg_serialize_format {\n    DSF_INVALID = 0,\n    DSF_THRIFT_BINARY = 1,\n    DSF_THRIFT_COMPACT = 2,\n    DSF_THRIFT_JSON = 3,\n    DSF_PROTOC_BINARY = 4,\n    DSF_PROTOC_JSON = 5,\n    DSF_JSON = 6\n} dsn_msg_serialize_format;\n\nENUM_BEGIN(dsn_msg_serialize_format, DSF_INVALID)\nENUM_REG(DSF_THRIFT_BINARY)\nENUM_REG(DSF_THRIFT_COMPACT)\nENUM_REG(DSF_THRIFT_JSON)\nENUM_REG(DSF_PROTOC_BINARY)\nENUM_REG(DSF_PROTOC_JSON)\nENUM_END(dsn_msg_serialize_format)\n\n// define network header format for RPC\nDEFINE_CUSTOMIZED_ID_TYPE(network_header_format)\nDEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_INVALID)\nDEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_DSN)\n\n// define network channel types for RPC\nDEFINE_CUSTOMIZED_ID_TYPE(rpc_channel)\nDEFINE_CUSTOMIZED_ID(rpc_channel, RPC_CHANNEL_TCP)\nDEFINE_CUSTOMIZED_ID(rpc_channel, RPC_CHANNEL_UDP)\n\nclass task;\nclass task_queue;\nclass aio_task;\nclass rpc_request_task;\nclass rpc_response_task;\nclass message_ex;\n\nstd::set<dsn::task_code> &get_storage_rpc_req_codes();\n\nclass task_spec : public extensible_object<task_spec, 4>\n{\npublic:\n    DSN_API static task_spec *get(int ec);\n    DSN_API static void register_task_code(dsn::task_code code,\n                                           dsn_task_type_t type,\n                                           dsn_task_priority_t pri,\n                                           dsn::threadpool_code pool);\n\n    DSN_API static void register_storage_task_code(dsn::task_code code,\n                                                   dsn_task_type_t type,\n                                                   dsn_task_priority_t pri,\n                                                   dsn::threadpool_code pool,\n                                                   bool is_write_operation,\n                                                   bool allow_batch,\n                                                   bool is_idempotent);\n\npublic:\n    // not configurable [\n    dsn::task_code code;\n    dsn_task_type_t type;\n    std::string name;\n    dsn::task_code rpc_paired_code;\n    shared_exp_delay rpc_request_delayer;\n\n    bool rpc_request_for_storage;\n    bool rpc_request_is_write_operation;   // need stateful replication\n    bool rpc_request_is_write_allow_batch; // if write allow batch\n    bool rpc_request_is_write_idempotent;  // if write operation is idempotent\n    // ]\n\n    // configurable [\n    dsn_task_priority_t priority;\n    grpc_mode_t grpc_mode; // used when a rpc request is sent to a group address\n    dsn::threadpool_code pool_code;\n\n    // allow task executed in other thread pools or tasks\n    // for TASK_TYPE_COMPUTE - allow-inline allows a task being executed in its caller site\n    // for other tasks - allow-inline allows a task being execution in io-thread\n    bool allow_inline;\n    bool randomize_timer_delay_if_zero; // to avoid many timers executing at the same time\n    network_header_format rpc_call_header_format;\n    dsn_msg_serialize_format rpc_msg_payload_serialize_default_format;\n    rpc_channel rpc_call_channel;\n    bool rpc_message_crc_required;\n\n    int32_t rpc_timeout_milliseconds;\n    int32_t rpc_request_resend_timeout_milliseconds;  // 0 for no auto-resend\n    throttling_mode_t rpc_request_throttling_mode;    //\n    std::vector<int> rpc_request_delays_milliseconds; // see exp_delay for delaying recving\n    bool rpc_request_dropped_before_execution_when_timeout;\n\n    // COMPUTE\n    /*!\n     @addtogroup tool-api-hooks\n     @{\n     */\n    join_point<void, task *, task *> on_task_create;\n\n    join_point<void, task *, task *> on_task_enqueue;\n    join_point<void, task *> on_task_begin; // TODO: parent task\n    join_point<void, task *> on_task_end;\n    join_point<void, task *> on_task_cancelled;\n\n    join_point<void, task *, task *, uint32_t> on_task_wait_pre; // waitor, waitee, timeout\n    join_point<void, task *> on_task_wait_notified;\n    join_point<void, task *, task *, bool> on_task_wait_post;   // wait succeeded or timedout\n    join_point<void, task *, task *, bool> on_task_cancel_post; // cancel succeeded or not\n\n    // AIO\n    join_point<bool, task *, aio_task *> on_aio_call; // return true means continue, otherwise early\n                                                      // terminate with task::set_error_code\n    join_point<void, aio_task *> on_aio_enqueue;      // aio done, enqueue callback\n\n    // RPC_REQUEST\n    join_point<bool, task *, message_ex *, rpc_response_task *>\n        on_rpc_call; // return true means continue, otherwise dropped and (optionally) timedout\n    join_point<bool, rpc_request_task *> on_rpc_request_enqueue;\n    join_point<void, rpc_request_task *> on_rpc_task_dropped; // rpc task dropped\n\n    // RPC_RESPONSE\n    join_point<bool, task *, message_ex *> on_rpc_reply;\n    join_point<bool, rpc_response_task *> on_rpc_response_enqueue; // response, task\n\n    // message data flow\n    join_point<void, message_ex *, message_ex *> on_rpc_create_response;\n    /*@}*/\n\npublic:\n    DSN_API task_spec(int code,\n                      const char *name,\n                      dsn_task_type_t type,\n                      dsn_task_priority_t pri,\n                      dsn::threadpool_code pool);\n\npublic:\n    DSN_API static bool init();\n    DSN_API void init_profiling(bool profile);\n};\n\nCONFIG_BEGIN(task_spec)\nCONFIG_FLD_ENUM(dsn_task_priority_t,\n                priority,\n                TASK_PRIORITY_COMMON,\n                TASK_PRIORITY_INVALID,\n                true,\n                \"task priority\")\nCONFIG_FLD_ENUM(grpc_mode_t,\n                grpc_mode,\n                GRPC_TO_LEADER,\n                GRPC_INVALID,\n                false,\n                \"group rpc mode: GRPC_TO_LEADER, GRPC_TO_ALL, GRPC_TO_ANY\")\nCONFIG_FLD_ID(\n    threadpool_code, pool_code, THREAD_POOL_DEFAULT, true, \"thread pool to execute the task\")\nCONFIG_FLD(bool,\n           bool,\n           allow_inline,\n           false,\n           \"allow task executed in other thread pools or tasks \"\n           \"for TASK_TYPE_COMPUTE - allow-inline allows a task being executed in its caller site \"\n           \"for other tasks - allow-inline allows a task being execution in io-thread \")\nCONFIG_FLD(bool,\n           bool,\n           randomize_timer_delay_if_zero,\n           false,\n           \"whether to randomize the timer delay \"\n           \"to random(0, timer_interval), if the \"\n           \"initial delay is zero, to avoid \"\n           \"multiple timers executing at the \"\n           \"same time (e.g., checkpointing)\")\nCONFIG_FLD_ID(network_header_format,\n              rpc_call_header_format,\n              NET_HDR_DSN,\n              false,\n              \"what kind of header format for this kind of rpc calls\")\nCONFIG_FLD_ENUM(dsn_msg_serialize_format,\n                rpc_msg_payload_serialize_default_format,\n                DSF_THRIFT_BINARY,\n                DSF_INVALID,\n                false,\n                \"what kind of payload serialization format for this kind of msgs\")\nCONFIG_FLD_ID(rpc_channel,\n              rpc_call_channel,\n              RPC_CHANNEL_TCP,\n              false,\n              \"what kind of network channel for this kind of rpc calls\")\nCONFIG_FLD(bool,\n           bool,\n           rpc_message_crc_required,\n           false,\n           \"whether to calculate the crc checksum when send request/response\")\nCONFIG_FLD(int32_t,\n           uint64,\n           rpc_timeout_milliseconds,\n           5000,\n           \"what is the default timeout (ms) for this kind of rpc calls\")\nCONFIG_FLD(int32_t,\n           uint64,\n           rpc_request_resend_timeout_milliseconds,\n           0,\n           \"for how long (ms) the \"\n           \"request will be resent if \"\n           \"no response is received \"\n           \"yet, 0 for disable this \"\n           \"feature\")\nCONFIG_FLD_ENUM(throttling_mode_t,\n                rpc_request_throttling_mode,\n                TM_NONE,\n                TM_INVALID,\n                false,\n                \"throttling mode for rpc requets: TM_NONE, TM_REJECT, TM_DELAY when queue length > \"\n                \"pool.queue_length_throttling_threshold\")\nCONFIG_FLD_INT_LIST(rpc_request_delays_milliseconds,\n                    \"how many milliseconds to delay recving rpc session for when queue length ~= \"\n                    \"[1.0, 1.2, 1.4, 1.6, 1.8, >=2.0] x pool.queue_length_throttling_threshold, \"\n                    \"e.g., 0, 0, 1, 2, 5, 10\")\nCONFIG_FLD(bool,\n           bool,\n           rpc_request_dropped_before_execution_when_timeout,\n           false,\n           \"whether to drop a request right before execution when its queueing time is already \"\n           \"greater than its timeout value\")\nCONFIG_END\n\n} // end namespace\n"
  },
  {
    "path": "include/dsn/tool-api/task_tracker.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     tracker abstraction for tasks, to ensure the tasks are cancelled\n *     appropriately when the context is gone\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/link.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/c/api_utilities.h>\n#include <atomic>\n\nnamespace dsn {\n//\n// many task requires a certain context to be executed\n// trackable_task helps manaing the context automatically\n// for tasks so that when the context is gone, the tasks are\n// automatically cancelled to avoid invalid context access\n//\nclass task;\nclass task_tracker;\nclass trackable_task\n{\npublic:\n    trackable_task() : _task(nullptr), _owner(nullptr), _dl_bucket_id(0) {}\n    virtual ~trackable_task() {}\n\n    void set_tracker(task_tracker *owner, task *tsk);\n    void unset_tracker();\n    task_tracker *tracker() const { return _owner; }\n\nprivate:\n    friend class task_tracker;\n\n    enum owner_delete_state\n    {\n        OWNER_DELETE_NOT_LOCKED = 0,\n        OWNER_DELETE_LOCKED = 1,\n        OWNER_DELETE_FINISHED = 2\n    };\n\n    task *_task;\n    task_tracker *_owner;\n    std::atomic<owner_delete_state> _deleting_owner;\n\n    // double-linked list for put into _owner\n    dlink _dl;\n    int _dl_bucket_id;\n\nprivate:\n    owner_delete_state owner_delete_prepare();\n    void owner_delete_commit();\n};\n\n//\n// task_tracker is used to track one or more unfinished tasks, you may use it to\n// wait or cancel the tasks tracked by the tracker.\n//\n// A classical situation is use tracker to prevent a task from visiting a deleted object.\n//\n// for example:\n// class A\n// {\n//   void func() {\n//     tasking::enqueue(task_code, [this](){ std::cout << this->value; }, seconds_10);\n//   }\n// private:\n//   int value;\n// };\n//\n// if a object of class A is deleted before the enqueued task executed,\n// memory corruption may occur.\n//\n// with task tracker, you may avoid this by declaring a variable of task_tracker in A\n// and use it to track the created task:\n//\n// class A\n// {\n// public:\n//   ~A() { _tracker.cancel_outstanding_tasks(); }\n//   void func()\n//   {\n//     tasking::enqueue(task_code,\n//                      &_tracker,\n//                      [this]() { std::cout << this->value; },\n//                      seconds_10);\n//   }\n//\n// private:\n//   int value;\n//   task_tracker _tracker;\n// };\n//\n// in the example above, calling \"_tracker.wait_outstanding_tasks()\" still works.\n// the main difference is that:\n//    1. \"wait\" will wait all tasks to finish\n//    2. as for \"cancel\", only running tasks will be waited.\n//       a not-started task will be cancelled directly.\n//\n// you may choose either one as you need.\n//\n// Some notices:\n//\n// 1. you may want to call \"cancel\" or \"wait\" in the beginning of\n//    of your destructor, so as to ensure all tasks are\n//    canceled/executed before destruction of any objects\n// 2. please ensure the \"wait\"(\"cancel\") and the created task are running\n//    IN DIFFERENT THREAD, otherwise deadlock may occur\n// 3. when wait_outstanding_task is called, please make sure that no timer tasks are running.\n//    if timer and non-timer are both tracked, you may want to cancel the timer first.\n//\n//    For example:\n//\n//    task_tracker t;\n//    auto tsk1 = tasking::enqueue(task_code, &t, [](){}, seconds_10);\n//    auto tsk2 = tasking::enqueue_timer(task_code, &t, [](){}, delay_10s, period_10s);\n//\n//    t.wait_outstanding_tasks();  <-- wrong, coz tsk2 is a timer\n//    t.cancel_outstanding_tasks(); <-- right, cancel can apply to any tasks.\n//    tsk2.cancel(true); t.wait_out_standing_tasks(); <-- right, first cancel timer, then wait.\n//\nclass task_tracker\n{\npublic:\n    explicit task_tracker(int task_bucket_count = 1);\n    virtual ~task_tracker();\n\n    // wait all outstanding tasks to finish\n    void wait_outstanding_tasks();\n\n    // cancel and wait all outstanding tasks to finish\n    void cancel_outstanding_tasks();\n\n    // cancel but not wait outstanding tasks to finish\n    // return not finished task count\n    int cancel_but_not_wait_outstanding_tasks();\n\n    void set_tasks_success() { _all_tasks_success = true; }\n\n    void clear_tasks_state() { _all_tasks_success = false; }\n\n    bool all_tasks_success() const { return _all_tasks_success; }\n\nprivate:\n    friend class trackable_task;\n    const int _task_bucket_count;\n    ::dsn::utils::ex_lock_nr_spin *_outstanding_tasks_lock;\n    dlink *_outstanding_tasks;\n    bool _all_tasks_success{false};\n};\n\n// ------- inlined implementation ----------\ninline void trackable_task::set_tracker(task_tracker *owner, task *tsk)\n{\n    dassert(_owner == nullptr, \"task tracker is already set\");\n    _owner = owner;\n    _task = tsk;\n    _deleting_owner.store(OWNER_DELETE_NOT_LOCKED, std::memory_order_release);\n\n    if (nullptr != _owner) {\n        _dl_bucket_id =\n            static_cast<int>(::dsn::utils::get_current_tid() % _owner->_task_bucket_count);\n        {\n            utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(\n                _owner->_outstanding_tasks_lock[_dl_bucket_id]);\n            _dl.insert_after(&_owner->_outstanding_tasks[_dl_bucket_id]);\n        }\n    }\n}\n\ninline void trackable_task::unset_tracker()\n{\n    if (nullptr != _owner) {\n        auto s = owner_delete_prepare();\n        switch (s) {\n        case OWNER_DELETE_NOT_LOCKED:\n            owner_delete_commit();\n            break;\n        case OWNER_DELETE_LOCKED:\n            while (OWNER_DELETE_LOCKED == _deleting_owner.load(std::memory_order_consume)) {\n            }\n            break;\n        case OWNER_DELETE_FINISHED:\n            break;\n        }\n        _owner = nullptr;\n    }\n}\n\ninline trackable_task::owner_delete_state trackable_task::owner_delete_prepare()\n{\n    return _deleting_owner.exchange(OWNER_DELETE_LOCKED, std::memory_order_acquire);\n}\n\ninline void trackable_task::owner_delete_commit()\n{\n    {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(\n            _owner->_outstanding_tasks_lock[_dl_bucket_id]);\n        _dl.remove();\n    }\n\n    _deleting_owner.store(OWNER_DELETE_FINISHED, std::memory_order_relaxed);\n}\n}\n"
  },
  {
    "path": "include/dsn/tool-api/task_worker.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     task worker (thread) abstraction\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/task_queue.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/perf_counter/perf_counter.h>\n#include <thread>\n\nnamespace dsn {\n\n/*!\n@addtogroup tool-api-providers\n@{\n*/\n/*!\n task worker processes the input tasks from the bound task queue\n*/\nclass task_worker : public extensible_object<task_worker, 4>\n{\npublic:\n    template <typename T>\n    static task_worker *\n    create(task_worker_pool *pool, task_queue *q, int index, task_worker *inner_provider)\n    {\n        return new T(pool, q, index, inner_provider);\n    }\n\n    typedef task_worker *(*factory)(task_worker_pool *, task_queue *, int, task_worker *);\n\npublic:\n    DSN_API\n    task_worker(task_worker_pool *pool, task_queue *q, int index, task_worker *inner_provider);\n    DSN_API virtual ~task_worker(void);\n\n    // service management\n    DSN_API void start();\n    DSN_API void stop();\n\n    DSN_API virtual void loop(); // run tasks from _input_queue\n\n    // inquery\n    const std::string &name() const { return _name; }\n    int index() const { return _index; }\n    int native_tid() const { return _native_tid; }\n    task_worker_pool *pool() const { return _owner_pool; }\n    task_queue *queue() const { return _input_queue; }\n    DSN_API const threadpool_spec &pool_spec() const;\n    DSN_API static task_worker *current();\n\nprivate:\n    task_worker_pool *_owner_pool;\n    task_queue *_input_queue;\n    int _index;\n    int _native_tid;\n    std::string _name;\n    std::unique_ptr<std::thread> _thread;\n    bool _is_running;\n    utils::notify_event _started;\n    int _processed_task_count;\n\npublic:\n    DSN_API static void set_name(const char *name);\n    DSN_API static void set_priority(worker_priority_t pri);\n    DSN_API static void set_affinity(uint64_t affinity);\n\nprivate:\n    void run_internal();\n\npublic:\n    /*!\n    @addtogroup tool-api-hooks\n    @{\n    */\n    DSN_API static join_point<void, task_worker *> on_start;\n    DSN_API static join_point<void, task_worker *> on_create;\n    /*@}*/\n};\n/*@}*/\n} // end namespace\n"
  },
  {
    "path": "include/dsn/tool-api/thread_access_checker.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\nnamespace dsn {\n\n///\n/// a simple class used to check if some code is accessed by only one thread.\n/// please refer to @replica.h and @lock_struct.h for a sample usage\n///\nclass thread_access_checker\n{\npublic:\n    thread_access_checker();\n    ~thread_access_checker();\n\n    void only_one_thread_access();\n\nprivate:\n    // TODO: the implementation is not thread safe. use atomic variable to reimplement this\n    int _access_thread_id;\n    bool _access_thread_id_inited;\n};\n}\n"
  },
  {
    "path": "include/dsn/tool-api/threadpool_code.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\nclass threadpool_code\n{\npublic:\n    threadpool_code() { _internal_code = 0; }\n    explicit threadpool_code(int c) : _internal_code(c) {}\n    threadpool_code(const threadpool_code &r) { _internal_code = r._internal_code; }\n    explicit threadpool_code(const char *name);\n    const char *to_string() const;\n    threadpool_code &operator=(const threadpool_code &source)\n    {\n        _internal_code = source._internal_code;\n        return *this;\n    }\n    bool operator==(const threadpool_code &r) { return _internal_code == r._internal_code; }\n    bool operator!=(const threadpool_code &r) { return !(*this == r); }\n    operator int() const { return _internal_code; }\n\n    static int max();\n    static bool is_exist(const char *name);\n\nprivate:\n    int _internal_code;\n};\n\n/*! define a new thread pool named x*/\n#define DEFINE_THREAD_POOL_CODE(x) __selectany const ::dsn::threadpool_code x(#x);\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_INVALID)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_DEFAULT)\n}\n"
  },
  {
    "path": "include/dsn/tool-api/threadpool_spec.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <string>\n#include <list>\n\n#include <dsn/utility/enum_helper.h>\n#include <dsn/utility/config_helper.h>\n#include <dsn/tool-api/threadpool_code.h>\n\nnamespace dsn {\n\nenum worker_priority_t\n{\n    THREAD_xPRIORITY_LOWEST,\n    THREAD_xPRIORITY_BELOW_NORMAL,\n    THREAD_xPRIORITY_NORMAL,\n    THREAD_xPRIORITY_ABOVE_NORMAL,\n    THREAD_xPRIORITY_HIGHEST,\n    THREAD_xPRIORITY_COUNT,\n    THREAD_xPRIORITY_INVALID\n};\n\nENUM_BEGIN(worker_priority_t, THREAD_xPRIORITY_INVALID)\nENUM_REG(THREAD_xPRIORITY_LOWEST)\nENUM_REG(THREAD_xPRIORITY_BELOW_NORMAL)\nENUM_REG(THREAD_xPRIORITY_NORMAL)\nENUM_REG(THREAD_xPRIORITY_ABOVE_NORMAL)\nENUM_REG(THREAD_xPRIORITY_HIGHEST)\nENUM_END(worker_priority_t)\n\nstruct threadpool_spec\n{\n    std::string name;\n    dsn::threadpool_code pool_code;\n    int worker_count;\n    worker_priority_t worker_priority;\n    bool worker_share_core;\n    uint64_t worker_affinity_mask;\n    int dequeue_batch_size;\n    bool partitioned; // false by default\n    std::string queue_factory_name;\n    std::string worker_factory_name;\n    std::list<std::string> queue_aspects;\n    std::list<std::string> worker_aspects;\n    int queue_length_throttling_threshold;\n    bool enable_virtual_queue_throttling;\n\n    threadpool_spec(const dsn::threadpool_code &code) : name(code.to_string()), pool_code(code) {}\n    threadpool_spec(const threadpool_spec &source) = default;\n    threadpool_spec &operator=(const threadpool_spec &source) = default;\n\n    static bool init(/*out*/ std::vector<threadpool_spec> &specs);\n};\n\nCONFIG_BEGIN(threadpool_spec)\nCONFIG_FLD_STRING(name, \"\", \"thread pool name\")\nCONFIG_FLD(int, uint64, worker_count, 2, \"thread/worker count\")\nCONFIG_FLD(int,\n           uint64,\n           dequeue_batch_size,\n           5,\n           \"how many tasks (if available) should be returned \"\n           \"for one dequeue call for best batching performance\")\nCONFIG_FLD_ENUM(worker_priority_t,\n                worker_priority,\n                THREAD_xPRIORITY_NORMAL,\n                THREAD_xPRIORITY_INVALID,\n                false,\n                \"thread priority\")\nCONFIG_FLD(bool, bool, worker_share_core, true, \"whether the threads share all assigned cores\")\nCONFIG_FLD(uint64_t,\n           uint64,\n           worker_affinity_mask,\n           0,\n           \"what CPU cores are assigned to this pool, 0 for all\")\nCONFIG_FLD(bool,\n           bool,\n           partitioned,\n           false,\n           \"whethe the threads share a single \"\n           \"queue(partitioned=false) or not; the latter is usually \"\n           \"for workload hash partitioning for avoiding locking\")\nCONFIG_FLD_STRING(queue_factory_name, \"\", \"task queue provider name\")\nCONFIG_FLD_STRING(worker_factory_name, \"\", \"task worker provider name\")\nCONFIG_FLD_STRING_LIST(queue_aspects, \"task queue aspects names, usually for tooling purpose\")\nCONFIG_FLD_STRING_LIST(worker_aspects, \"task aspects names, usually for tooling purpose\")\nCONFIG_FLD(int,\n           uint64,\n           queue_length_throttling_threshold,\n           1000000,\n           \"throttling: throttling threshold above which rpc requests will be dropped\")\nCONFIG_FLD(bool,\n           bool,\n           enable_virtual_queue_throttling,\n           false,\n           \"throttling: whether to enable throttling with virtual queues\")\nCONFIG_END\n}\n"
  },
  {
    "path": "include/dsn/tool-api/timer_service.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     timer service abstraction\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/task.h>\n\nnamespace dsn {\nclass service_node;\n\n/*!\n@addtogroup tool-api-providers\n@{\n*/\n/*!\n  timer service schedules the input tasks at specified timepoint\n*/\nclass timer_service\n{\npublic:\n    template <typename T>\n    static timer_service *create(service_node *node, timer_service *inner_provider)\n    {\n        return new T(node, inner_provider);\n    }\n\n    typedef timer_service *(*factory)(service_node *, timer_service *);\n\npublic:\n    timer_service(service_node *node, timer_service *inner_provider) { _node = node; }\n\n    virtual ~timer_service() = default;\n\n    virtual void start() = 0;\n    virtual void stop() = 0;\n\n    // after milliseconds, the provider should call task->enqueue()\n    virtual void add_timer(task *task) = 0;\n\n    // inquery\n    service_node *node() const { return _node; }\n\nprivate:\n    service_node *_node;\n};\n/*@}*/\n} // end namespace\n"
  },
  {
    "path": "include/dsn/tool-api/uniq_timestamp_us.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <cstdint>\n#include <algorithm>\n#include <dsn/utility/ports.h>\n#include <dsn/c/api_layer1.h>\n\nnamespace dsn {\n//\n// uniq_timestamp_us is used to generate an increasing unique microsecond timestamp\n// in rdsn, it's mainly used for replica to set mutation's timestamp\n//\n// Notice: this module is not thread-safe,\n// please ensure that it is accessed only by one thread\n//\nclass uniq_timestamp_us\n{\nprivate:\n    uint64_t _last_ts;\n\npublic:\n    uniq_timestamp_us() { _last_ts = dsn_now_us(); }\n\n    void try_update(uint64_t new_ts)\n    {\n        if (dsn_likely(new_ts > _last_ts))\n            _last_ts = new_ts;\n    }\n\n    uint64_t next()\n    {\n        _last_ts = std::max(dsn_now_us(), _last_ts + 1);\n        return _last_ts;\n    }\n};\n}\n"
  },
  {
    "path": "include/dsn/tool-api/zlocks.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <dsn/utility/utils.h>\n\n///\n/// synchronization objects of rDSN.\n///\n/// you MUST always use these objects to do synchronization when you write code\n/// in rdsn's \"service_app\", because different implementations may be provided\n/// when then program is running in different mode(nativerun/simulator).\n///\n/// As for the synchronize objects in \"utility/synchronize.h\", they are\n/// used for synchronization inner the rdsn core runtime.\n///\n\nnamespace dsn {\nclass ilock;\nclass zlock\n{\npublic:\n    zlock(bool recursive = false);\n    ~zlock();\n\n    void lock();\n    bool try_lock();\n    void unlock();\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(zlock);\n    ilock *_h;\n};\n\nclass rwlock_nr_provider;\nclass zrwlock_nr\n{\npublic:\n    zrwlock_nr();\n    ~zrwlock_nr();\n\n    void lock_read();\n    void unlock_read();\n    bool try_lock_read();\n\n    void lock_write();\n    void unlock_write();\n    bool try_lock_write();\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(zrwlock_nr);\n    rwlock_nr_provider *_h;\n};\n\nclass semaphore_provider;\nclass zsemaphore\n{\npublic:\n    zsemaphore(int initial_count = 0);\n    ~zsemaphore();\n\n    void signal(int count = 1);\n    bool wait(int timeout_milliseconds = TIME_MS_MAX);\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(zsemaphore);\n    semaphore_provider *_h;\n};\n\nclass zevent\n{\npublic:\n    zevent(bool manualReset, bool initState = false);\n    ~zevent();\n\n    void set();\n    void reset();\n    bool wait(int timeout_milliseconds = TIME_MS_MAX);\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(zevent);\n    zsemaphore _sema;\n    std::atomic<bool> _signaled;\n    bool _manualReset;\n};\n}\n\n///\n/// RAII wrapper of rdsn's synchronization objects\n///\nnamespace dsn {\nclass zauto_lock\n{\npublic:\n    zauto_lock() : _locked(false), _lock(nullptr) {}\n    zauto_lock(zlock &lock) : _locked(true), _lock(&lock) { _lock->lock(); }\n    ~zauto_lock()\n    {\n        if (_locked) {\n            _lock->unlock();\n            _locked = false;\n        }\n    }\n\n    void swap(zauto_lock &other)\n    {\n        std::swap(_locked, other._locked);\n        std::swap(_lock, other._lock);\n    }\n\nprivate:\n    bool _locked;\n    zlock *_lock;\n};\n\nclass zauto_read_lock\n{\npublic:\n    zauto_read_lock() : _locked(false), _lock(nullptr) {}\n    zauto_read_lock(zrwlock_nr &lock) : _locked(true), _lock(&lock) { _lock->lock_read(); }\n    ~zauto_read_lock()\n    {\n        if (_locked) {\n            _lock->unlock_read();\n            _locked = false;\n        }\n    }\n\n    void swap(zauto_read_lock &other)\n    {\n        std::swap(_locked, other._locked);\n        std::swap(_lock, other._lock);\n    }\n\nprivate:\n    bool _locked;\n    zrwlock_nr *_lock;\n};\n\nclass zauto_write_lock\n{\npublic:\n    zauto_write_lock() : _locked(false), _lock(nullptr) {}\n    zauto_write_lock(zrwlock_nr &lock) : _locked(true), _lock(&lock) { _lock->lock_write(); }\n    ~zauto_write_lock()\n    {\n        if (_locked) {\n            _lock->unlock_write();\n            _locked = false;\n        }\n    }\n\n    void swap(zauto_write_lock &other)\n    {\n        std::swap(_locked, other._locked);\n        std::swap(_lock, other._lock);\n    }\n\nprivate:\n    bool _locked;\n    zrwlock_nr *_lock;\n};\n}\n\n///\n/// utils function used to check the lock safety\n///\nnamespace dsn {\nnamespace lock_checker {\nvoid check_wait_safety();\nvoid check_dangling_lock();\n}\n}\n"
  },
  {
    "path": "include/dsn/tool_api.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     define the interface for implementing and plug-in the tools &\n *     runtime components into rDSN.\n *     In rDSN, both developement tools and runtime libraries\n *     (e.g., high performance components) are considered tools.\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n/*!\n@defgroup tool-api-hooks Join Points\n@ingroup tool-api\n\nJoin points are hooks that allow for system monitoring and manipulation\n\n@defgroup tool-api-providers Component Providers\n@ingroup tool-api\n\nComponent providers define the interface for the local components (e.g., network, lock)\n*/\n\n#pragma once\n\n// providers\n#include <dsn/utility/factory_store.h>\n#include <dsn/tool-api/task_queue.h>\n#include <dsn/tool-api/task_worker.h>\n#include <dsn/tool-api/network.h>\n#include <dsn/tool-api/env_provider.h>\n#include <dsn/tool-api/message_parser.h>\n#include <dsn/tool-api/logging_provider.h>\n#include <dsn/tool-api/timer_service.h>\n#include <dsn/utility/sys_exit_hook.h>\n\nnamespace dsn {\nnamespace tools {\n\n/*!\n@addtogroup tool-api-providers\n@{\n */\nclass tool_base\n{\npublic:\n    virtual ~tool_base() {}\n\n    DSN_API explicit tool_base(const char *name);\n\n    const std::string &name() const { return _name; }\n\nprotected:\n    std::string _name;\n};\n\nclass toollet : public tool_base\n{\npublic:\n    template <typename T>\n    static toollet *create(const char *name)\n    {\n        return new T(name);\n    }\n\n    typedef toollet *(*factory)(const char *);\n\npublic:\n    DSN_API toollet(const char *name);\n\n    virtual void install(service_spec &spec) = 0;\n};\n\nclass tool_app : public tool_base\n{\npublic:\n    template <typename T>\n    static tool_app *create(const char *name)\n    {\n        return new T(name);\n    }\n\n    typedef tool_app *(*factory)(const char *);\n\npublic:\n    DSN_API tool_app(const char *name);\n\n    virtual void install(service_spec &spec) = 0;\n\n    // this routine will be invoked in the main thread as the tool driver (if necessary for the\n    // tool, e.g., model checking)\n    virtual void run() { start_all_apps(); }\n\npublic:\n    DSN_API virtual void start_all_apps();\n    DSN_API virtual void stop_all_apps(bool cleanup);\n\n    DSN_API static const service_spec &get_service_spec();\n};\n\nnamespace internal_use_only {\nDSN_API bool\nregister_component_provider(const char *name, timer_service::factory f, ::dsn::provider_type type);\nDSN_API bool\nregister_component_provider(const char *name, task_queue::factory f, ::dsn::provider_type type);\nDSN_API bool\nregister_component_provider(const char *name, task_worker::factory f, ::dsn::provider_type type);\nDSN_API bool\nregister_component_provider(const char *name, network::factory f, ::dsn::provider_type type);\nDSN_API bool\nregister_component_provider(const char *name, env_provider::factory f, ::dsn::provider_type type);\nDSN_API bool register_component_provider(network_header_format fmt,\n                                         const std::vector<const char *> &signatures,\n                                         message_parser::factory f,\n                                         size_t sz);\nDSN_API bool register_toollet(const char *name, toollet::factory f, ::dsn::provider_type type);\nDSN_API bool register_tool(const char *name, tool_app::factory f, ::dsn::provider_type type);\nDSN_API toollet *get_toollet(const char *name, ::dsn::provider_type type);\n} // namespace internal_use_only\n\n/*!\n@addtogroup tool-api-hooks\n@{\n*/\nDSN_API extern join_point<void> sys_init_before_app_created;\nDSN_API extern join_point<void> sys_init_after_app_created;\n/*@}*/\n\ntemplate <typename T>\nbool register_component_provider(const char *name)\n{\n    return internal_use_only::register_component_provider(\n        name, T::template create<T>, ::dsn::PROVIDER_TYPE_MAIN);\n}\n\ntemplate <typename T>\nstruct component_provider_registerer\n{\n    component_provider_registerer(const char *name) { register_component_provider<T>(name); }\n};\n\ntemplate <typename T>\nbool register_component_aspect(const char *name)\n{\n    return internal_use_only::register_component_provider(\n        name, T::template create<T>, ::dsn::PROVIDER_TYPE_ASPECT);\n}\ntemplate <typename T>\nbool register_message_header_parser(network_header_format fmt,\n                                    const std::vector<const char *> &signatures);\n\ntemplate <typename T>\nbool register_toollet(const char *name)\n{\n    return internal_use_only::register_toollet(\n        name, toollet::template create<T>, ::dsn::PROVIDER_TYPE_MAIN);\n}\ntemplate <typename T>\nbool register_tool(const char *name)\n{\n    return internal_use_only::register_tool(\n        name, tool_app::template create<T>, ::dsn::PROVIDER_TYPE_MAIN);\n}\ntemplate <typename T>\nT *get_toollet(const char *name)\n{\n    return (T *)internal_use_only::get_toollet(name, ::dsn::PROVIDER_TYPE_MAIN);\n}\nDSN_API tool_app *get_current_tool();\nDSN_API const service_spec &spec();\nDSN_API const char *get_service_node_name(service_node *node);\nDSN_API bool is_engine_ready();\n\n/*\n @}\n */\n\n// --------- inline implementation -----------------------------\ntemplate <typename T>\nbool register_message_header_parser(network_header_format fmt,\n                                    const std::vector<const char *> &signatures)\n{\n    return internal_use_only::register_component_provider(\n        fmt, signatures, T::template create<T>, sizeof(T));\n}\n} // namespace tools\n\n#define DSN_REGISTER_COMPONENT_PROVIDER(type, name)                                                \\\n    static tools::component_provider_registerer<type> COMPONENT_PROVIDER_REG_##type(name)\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/toollet/fault_injector.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     fault inject toolet interface definition\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\n/*!\n@defgroup fault-injector Fault Injector\n@ingroup tools-test\n\nFault Injector toollet\n\nThis toollet injects faults to mimic various failures in production environments,\nas configed below.\n\n<PRE>\n\n[core]\n\ntoollets = fault_injector\n\n[task..default]\n; whether enable fault injection\nfault_injection_enabled = true\n\n; maximum disk operation delay (ms)\ndisk_io_delay_ms_max = 12\n\n; miminum disk operation delay (ms)\ndisk_io_delay_ms_min = 1\n\n; failure ratio for disk read operations\ndisk_read_fail_ratio = 0.000001\n\n; failure ratio for disk write operations\ndisk_write_fail_ratio = 0.000001\n\n; extra execution time delay (us) for this task\nexecution_extra_delay_us_max = 0\n\n; maximum message delay (ms) for rpc messages\nrpc_message_delay_ms_max = 1000\n\n; miminum message delay (ms) for rpc messages\nrpc_message_delay_ms_min = 0\n\n\n; drop ratio for rpc request messages\nrpc_request_drop_ratio = 0.000100\n\n\n; drop ratio for rpc response messages\nrpc_response_drop_ratio = 0.001000\n\n[task.RPC_PING]\nfault_injection_enabled = false\n\n</PRE>\n*/\nnamespace dsn {\nnamespace tools {\n\nclass fault_injector : public toollet\n{\npublic:\n    explicit fault_injector(const char *name);\n    void install(service_spec &spec) override;\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/toollet/profiler.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\n/*!\n@defgroup profiler Profiler\n@ingroup tools\n\nProfiler toollet\n\nThis toollet collects many performance counter values for the specified tasks,\nas configed below.\n\n<PRE>\n\n[core]\n\ntoollets = profiler\n\n[task..default]\nis_profile = true\n\n[task.RPC_PING]\nis_profile = false\n\n</PRE>\n*/\n\nnamespace dsn {\nnamespace tools {\n\nclass profiler : public toollet\n{\npublic:\n    profiler(const char *name);\n    virtual void install(service_spec &spec);\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/toollet/tracer.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the tracer toollets traces all the asynchonous execution flow\n *     in the system through the join-point mechanism\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\n/*!\n@defgroup tracer Tracer\n@ingroup tools\n\nTracer toollet\n\nThis toollet logs all task operations for the specified tasks,\nas configed below.\n\n<PRE>\n\n[core]\n\ntoollets = tracer\n\n[task..default]\nis_trace = true\n\n; whether to trace when an aio task is called\ntracer::on_aio_call = true\n\n; whether to trace when an aio task is enqueued\ntracer::on_aio_enqueue = true\n\n; whether to trace when a rpc is made\ntracer::on_rpc_call = true\n\n; whether to trace when reply a rpc request\ntracer::on_rpc_reply = true\n\n; whether to trace when a rpc request task is enqueued\ntracer::on_rpc_request_enqueue = true\n\n; whetehr to trace when a rpc response task is enqueued\ntracer::on_rpc_response_enqueue = true\n\n; whether to trace when a task begins\ntracer::on_task_begin = true\n\n; whether to trace when a task is cancel post\ntracer::on_task_cancel_post = true\n\n; whether to trace when a task is cancelled\ntracer::on_task_cancelled = true\n\n; whether to trace when a task ends\ntracer::on_task_end = true\n\n; whether to trace when a timer or async task is enqueued\ntracer::on_task_enqueue = true\n\n; whether to trace when a task is wait post\ntracer::on_task_wait_post = true\n\n; whether to trace when a task is to be wait\ntracer::on_task_wait_pre = true\n\n[task.RPC_PING]\nis_trace = false\n\n</PRE>\n*/\nnamespace dsn {\nnamespace tools {\n\nclass tracer : public toollet\n{\npublic:\n    tracer(const char *name);\n    virtual void install(service_spec &spec);\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/TokenBucket.h",
    "content": "/*\n * Copyright (c) Facebook, Inc. and its affiliates.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <atomic>\n#include <chrono>\n#include <thread>\n#include <boost/optional.hpp>\n#include <dsn/utility/ports.h>\n\nnamespace folly {\n\n/**\n * Thread-safe (atomic) token bucket implementation.\n *\n * A token bucket (http://en.wikipedia.org/wiki/Token_bucket) models a stream\n * of events with an average rate and some amount of burstiness. The canonical\n * example is a packet switched network: the network can accept some number of\n * bytes per second and the bytes come in finite packets (bursts). A token\n * bucket stores up to a fixed number of tokens (the burst size). Some number\n * of tokens are removed when an event occurs. The tokens are replenished at a\n * fixed rate. Failure to allocate tokens implies resource is unavailable and\n * caller needs to implement its own retry mechanism. For simple cases where\n * caller is okay with a FIFO starvation-free scheduling behavior, there are\n * also APIs to 'borrow' from the future effectively assigning a start time to\n * the caller when it should proceed with using the resource. It is also\n * possible to 'return' previously allocated tokens to make them available to\n * other users. Returns in excess of burstSize are considered expired and\n * will not be available to later callers.\n *\n * This implementation records the last time it was updated. This allows the\n * token bucket to add tokens \"just in time\" when tokens are requested.\n *\n * The \"dynamic\" base variant allows the token generation rate and maximum\n * burst size to change with every token consumption.\n *\n * @tparam Clock Clock type, must be steady i.e. monotonic.\n */\ntemplate <typename Clock = std::chrono::steady_clock>\nclass BasicDynamicTokenBucket\n{\n    static_assert(Clock::is_steady, \"clock must be steady\");\n\npublic:\n    /**\n     * Constructor.\n     *\n     * @param zeroTime Initial time at which to consider the token bucket\n     *                 starting to fill. Defaults to 0, so by default token\n     *                 buckets are \"full\" after construction.\n     */\n    explicit BasicDynamicTokenBucket(double zeroTime = 0) noexcept : zeroTime_(zeroTime) {}\n\n    /**\n     * Copy constructor.\n     *\n     * Thread-safe. (Copy constructors of derived classes may not be thread-safe\n     * however.)\n     */\n    BasicDynamicTokenBucket(const BasicDynamicTokenBucket &other) noexcept\n        : zeroTime_(other.zeroTime_.load())\n    {\n    }\n\n    /**\n     * Copy-assignment operator.\n     *\n     * Warning: not thread safe for the object being assigned to (including\n     * self-assignment). Thread-safe for the other object.\n     */\n    BasicDynamicTokenBucket &operator=(const BasicDynamicTokenBucket &other) noexcept\n    {\n        zeroTime_ = other.zeroTime_.load();\n        return *this;\n    }\n\n    /**\n     * Re-initialize token bucket.\n     *\n     * Thread-safe.\n     *\n     * @param zeroTime Initial time at which to consider the token bucket\n     *                 starting to fill. Defaults to 0, so by default token\n     *                 bucket is reset to \"full\".\n     */\n    void reset(double zeroTime = 0) noexcept { zeroTime_ = zeroTime; }\n\n    /**\n     * Returns the current time in seconds since Epoch.\n     */\n    static double defaultClockNow() noexcept\n    {\n        auto const now = Clock::now().time_since_epoch();\n        return std::chrono::duration<double>(now).count();\n    }\n\n    /**\n     * Attempts to consume some number of tokens. Tokens are first added to the\n     * bucket based on the time elapsed since the last attempt to consume tokens.\n     * Note: Attempts to consume more tokens than the burst size will always\n     * fail.\n     *\n     * Thread-safe.\n     *\n     * @param toConsume The number of tokens to consume.\n     * @param rate Number of tokens to generate per second.\n     * @param burstSize Maximum burst size. Must be greater than 0.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     * @return True if the rate limit check passed, false otherwise.\n     */\n    bool consume(double toConsume,\n                 double rate,\n                 double burstSize,\n                 double nowInSeconds = defaultClockNow())\n    {\n        assert(rate > 0);\n        assert(burstSize > 0);\n\n        if (nowInSeconds <= zeroTime_.load()) {\n            return 0;\n        }\n\n        return consumeImpl(rate, burstSize, nowInSeconds, [toConsume](double &tokens) {\n            if (tokens < toConsume) {\n                return false;\n            }\n            tokens -= toConsume;\n            return true;\n        });\n    }\n\n    /**\n     * Similar to consume, but always consumes some number of tokens.  If the\n     * bucket contains enough tokens - consumes toConsume tokens.  Otherwise the\n     * bucket is drained.\n     *\n     * Thread-safe.\n     *\n     * @param toConsume The number of tokens to consume.\n     * @param rate Number of tokens to generate per second.\n     * @param burstSize Maximum burst size. Must be greater than 0.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     * @return number of tokens that were consumed.\n     */\n    double consumeOrDrain(double toConsume,\n                          double rate,\n                          double burstSize,\n                          double nowInSeconds = defaultClockNow())\n    {\n        assert(rate > 0);\n        assert(burstSize > 0);\n\n        if (nowInSeconds <= zeroTime_.load()) {\n            return 0;\n        }\n\n        double consumed;\n        consumeImpl(rate, burstSize, nowInSeconds, [&consumed, toConsume](double &tokens) {\n            if (tokens < toConsume) {\n                consumed = tokens;\n                tokens = 0.0;\n            } else {\n                consumed = toConsume;\n                tokens -= toConsume;\n            }\n            return true;\n        });\n        return consumed;\n    }\n\n    /**\n     * Return extra tokens back to the bucket. This will move the zeroTime_\n     * value back based on the rate.\n     *\n     * Thread-safe.\n     */\n    void returnTokens(double tokensToReturn, double rate)\n    {\n        assert(rate > 0);\n        assert(tokensToReturn > 0);\n\n        returnTokensImpl(tokensToReturn, rate);\n    }\n\n    /**\n     * Like consumeOrDrain but the call will always satisfy the asked for count.\n     * It does so by borrowing tokens from the future (zeroTime_ will move\n     * forward) if the currently available count isn't sufficient.\n     *\n     * Returns a folly::Optional<double>. The optional wont be set if the request\n     * cannot be satisfied: only case is when it is larger than burstSize. The\n     * value of the optional is a double indicating the time in seconds that the\n     * caller needs to wait at which the reservation becomes valid. The caller\n     * could simply sleep for the returned duration to smooth out the allocation\n     * to match the rate limiter or do some other computation in the meantime. In\n     * any case, any regular consume or consumeOrDrain calls will fail to allocate\n     * any tokens until the future time is reached.\n     *\n     * Note: It is assumed the caller will not ask for a very large count nor use\n     * it immediately (if not waiting inline) as that would break the burst\n     * prevention the limiter is meant to be used for.\n     *\n     * Thread-safe.\n     */\n    boost::optional<double> consumeWithBorrowNonBlocking(double toConsume,\n                                                         double rate,\n                                                         double burstSize,\n                                                         double nowInSeconds = defaultClockNow())\n    {\n        assert(rate > 0);\n        assert(burstSize > 0);\n\n        if (burstSize < toConsume) {\n            // boost::none\n            // if we use boost::none here, some compilers will generate warning\n            // that's actually a false positive of \"-Wmaybe-uninitialized\".\n            // https://www.boost.org/doc/libs/1_65_1/libs/optional/doc/html/boost_optional/tutorial/gotchas/false_positive_with__wmaybe_uninitialized.html\n            return boost::make_optional(false, double());\n        }\n\n        while (toConsume > 0) {\n            double consumed = consumeOrDrain(toConsume, rate, burstSize, nowInSeconds);\n            if (consumed > 0) {\n                toConsume -= consumed;\n            } else {\n                double zeroTimeNew = returnTokensImpl(-toConsume, rate);\n                double napTime = std::max(0.0, zeroTimeNew - nowInSeconds);\n                return boost::optional<double>(napTime);\n            }\n        }\n        return boost::optional<double>(0);\n    }\n\n    /**\n     * Convenience wrapper around non-blocking borrow to sleep inline until\n     * reservation is valid.\n     */\n    bool consumeWithBorrowAndWait(double toConsume,\n                                  double rate,\n                                  double burstSize,\n                                  double nowInSeconds = defaultClockNow())\n    {\n        auto res = consumeWithBorrowNonBlocking(toConsume, rate, burstSize, nowInSeconds);\n        if (res.get_value_or(0) > 0) {\n            int64_t napUSec = res.get() * 1000000;\n            std::this_thread::sleep_for(std::chrono::microseconds(napUSec));\n        }\n        return res.is_initialized();\n    }\n\n    /**\n     * Returns the number of tokens currently available.\n     *\n     * Thread-safe (but returned value may immediately be outdated).\n     */\n    double available(double rate, double burstSize, double nowInSeconds = defaultClockNow()) const\n        noexcept\n    {\n        assert(rate > 0);\n        assert(burstSize > 0);\n\n        double zt = this->zeroTime_.load();\n        if (nowInSeconds <= zt) {\n            return 0;\n        }\n        return std::min((nowInSeconds - zt) * rate, burstSize);\n    }\n\nprivate:\n    template <typename TCallback>\n    bool consumeImpl(double rate, double burstSize, double nowInSeconds, const TCallback &callback)\n    {\n        auto zeroTimeOld = zeroTime_.load();\n        double zeroTimeNew;\n        do {\n            auto tokens = std::min((nowInSeconds - zeroTimeOld) * rate, burstSize);\n            if (!callback(tokens)) {\n                return false;\n            }\n            zeroTimeNew = nowInSeconds - tokens / rate;\n        } while (dsn_unlikely(!zeroTime_.compare_exchange_weak(zeroTimeOld, zeroTimeNew)));\n\n        return true;\n    }\n\n    /**\n     * Adjust zeroTime based on rate and tokenCount and return the new value of\n     * zeroTime_. Note: Token count can be negative to move the zeroTime_ value\n     * into the future.\n     */\n    double returnTokensImpl(double tokenCount, double rate)\n    {\n        auto zeroTimeOld = zeroTime_.load();\n        double zeroTimeNew;\n        do {\n            zeroTimeNew = zeroTimeOld - tokenCount / rate;\n        } while (dsn_unlikely(!zeroTime_.compare_exchange_weak(zeroTimeOld, zeroTimeNew)));\n        return zeroTimeNew;\n    }\n\n    std::atomic<double> zeroTime_;\n};\n\n/**\n * Specialization of BasicDynamicTokenBucket with a fixed token\n * generation rate and a fixed maximum burst size.\n */\ntemplate <typename Clock = std::chrono::steady_clock>\nclass BasicTokenBucket\n{\n    static_assert(Clock::is_steady, \"clock must be steady\");\n\nprivate:\n    using Impl = BasicDynamicTokenBucket<Clock>;\n\npublic:\n    /**\n     * Construct a token bucket with a specific maximum rate and burst size.\n     *\n     * @param genRate Number of tokens to generate per second.\n     * @param burstSize Maximum burst size. Must be greater than 0.\n     * @param zeroTime Initial time at which to consider the token bucket\n     *                 starting to fill. Defaults to 0, so by default token\n     *                 bucket is \"full\" after construction.\n     */\n    BasicTokenBucket(double genRate, double burstSize, double zeroTime = 0) noexcept\n        : tokenBucket_(zeroTime), rate_(genRate), burstSize_(burstSize)\n    {\n        assert(rate_ > 0);\n        assert(burstSize_ > 0);\n    }\n\n    /**\n     * Copy constructor.\n     *\n     * Warning: not thread safe!\n     */\n    BasicTokenBucket(const BasicTokenBucket &other) noexcept = default;\n\n    /**\n     * Copy-assignment operator.\n     *\n     * Warning: not thread safe!\n     */\n    BasicTokenBucket &operator=(const BasicTokenBucket &other) noexcept = default;\n\n    /**\n     * Returns the current time in seconds since Epoch.\n     */\n    static double defaultClockNow() noexcept(noexcept(Impl::defaultClockNow()))\n    {\n        return Impl::defaultClockNow();\n    }\n\n    /**\n     * Change rate and burst size.\n     *\n     * Warning: not thread safe!\n     *\n     * @param genRate Number of tokens to generate per second.\n     * @param burstSize Maximum burst size. Must be greater than 0.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     */\n    void reset(double genRate, double burstSize, double nowInSeconds = defaultClockNow()) noexcept\n    {\n        assert(genRate > 0);\n        assert(burstSize > 0);\n        const double availTokens = available(nowInSeconds);\n        rate_ = genRate;\n        burstSize_ = burstSize;\n        setCapacity(availTokens, nowInSeconds);\n    }\n\n    /**\n     * Change number of tokens in bucket.\n     *\n     * Warning: not thread safe!\n     *\n     * @param tokens Desired number of tokens in bucket after the call.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     */\n    void setCapacity(double tokens, double nowInSeconds) noexcept\n    {\n        tokenBucket_.reset(nowInSeconds - tokens / rate_);\n    }\n\n    /**\n     * Attempts to consume some number of tokens. Tokens are first added to the\n     * bucket based on the time elapsed since the last attempt to consume tokens.\n     * Note: Attempts to consume more tokens than the burst size will always\n     * fail.\n     *\n     * Thread-safe.\n     *\n     * @param toConsume The number of tokens to consume.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     * @return True if the rate limit check passed, false otherwise.\n     */\n    bool consume(double toConsume, double nowInSeconds = defaultClockNow())\n    {\n        return tokenBucket_.consume(toConsume, rate_, burstSize_, nowInSeconds);\n    }\n\n    /**\n     * Similar to consume, but always consumes some number of tokens.  If the\n     * bucket contains enough tokens - consumes toConsume tokens.  Otherwise the\n     * bucket is drained.\n     *\n     * Thread-safe.\n     *\n     * @param toConsume The number of tokens to consume.\n     * @param nowInSeconds Current time in seconds. Should be monotonically\n     *                     increasing from the nowInSeconds specified in\n     *                     this token bucket's constructor.\n     * @return number of tokens that were consumed.\n     */\n    double consumeOrDrain(double toConsume, double nowInSeconds = defaultClockNow())\n    {\n        return tokenBucket_.consumeOrDrain(toConsume, rate_, burstSize_, nowInSeconds);\n    }\n\n    /**\n     * Returns extra token back to the bucket.\n     */\n    void returnTokens(double tokensToReturn)\n    {\n        return tokenBucket_.returnTokens(tokensToReturn, rate_);\n    }\n\n    /**\n     * Reserve tokens and return time to wait for in order for the reservation to\n     * be compatible with the bucket configuration.\n     */\n    boost::optional<double> consumeWithBorrowNonBlocking(double toConsume,\n                                                         double nowInSeconds = defaultClockNow())\n    {\n        return tokenBucket_.consumeWithBorrowNonBlocking(\n            toConsume, rate_, burstSize_, nowInSeconds);\n    }\n\n    /**\n     * Reserve tokens. Blocks if need be until reservation is satisfied.\n     */\n    bool consumeWithBorrowAndWait(double toConsume, double nowInSeconds = defaultClockNow())\n    {\n        return tokenBucket_.consumeWithBorrowAndWait(toConsume, rate_, burstSize_, nowInSeconds);\n    }\n\n    /**\n     * Returns the number of tokens currently available.\n     *\n     * Thread-safe (but returned value may immediately be outdated).\n     */\n    double available(double nowInSeconds = defaultClockNow()) const\n    {\n        return tokenBucket_.available(rate_, burstSize_, nowInSeconds);\n    }\n\n    /**\n     * Returns the number of tokens generated per second.\n     *\n     * Thread-safe (but returned value may immediately be outdated).\n     */\n    double rate() const noexcept { return rate_; }\n\n    /**\n     * Returns the maximum burst size.\n     *\n     * Thread-safe (but returned value may immediately be outdated).\n     */\n    double burst() const noexcept { return burstSize_; }\n\nprivate:\n    Impl tokenBucket_;\n    double rate_;\n    double burstSize_;\n};\n\nusing TokenBucket = BasicTokenBucket<>;\nusing DynamicTokenBucket = BasicDynamicTokenBucket<>;\n\n} // namespace folly\n"
  },
  {
    "path": "include/dsn/utility/absl/base/internal/invoke.h",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n// absl::base_internal::Invoke(f, args...) is an implementation of\n// INVOKE(f, args...) from section [func.require] of the C++ standard.\n//\n// [func.require]\n// Define INVOKE (f, t1, t2, ..., tN) as follows:\n// 1. (t1.*f)(t2, ..., tN) when f is a pointer to a member function of a class T\n//    and t1 is an object of type T or a reference to an object of type T or a\n//    reference to an object of a type derived from T;\n// 2. ((*t1).*f)(t2, ..., tN) when f is a pointer to a member function of a\n//    class T and t1 is not one of the types described in the previous item;\n// 3. t1.*f when N == 1 and f is a pointer to member data of a class T and t1 is\n//    an object of type T or a reference to an object of type T or a reference\n//    to an object of a type derived from T;\n// 4. (*t1).*f when N == 1 and f is a pointer to member data of a class T and t1\n//    is not one of the types described in the previous item;\n// 5. f(t1, t2, ..., tN) in all other cases.\n//\n// The implementation is SFINAE-friendly: substitution failure within Invoke()\n// isn't an error.\n\n#ifndef ABSL_BASE_INTERNAL_INVOKE_H_\n#define ABSL_BASE_INTERNAL_INVOKE_H_\n\n#include <algorithm>\n#include <type_traits>\n#include <utility>\n\n// The following code is internal implementation detail.  See the comment at the\n// top of this file for the API documentation.\n\n// make namespace absl internal of dsn to solve redefine error with absl in s2geometry\nnamespace dsn {\nnamespace absl {\nnamespace base_internal {\n\n// The five classes below each implement one of the clauses from the definition\n// of INVOKE. The inner class template Accept<F, Args...> checks whether the\n// clause is applicable; static function template Invoke(f, args...) does the\n// invocation.\n//\n// By separating the clause selection logic from invocation we make sure that\n// Invoke() does exactly what the standard says.\n\ntemplate <typename Derived>\nstruct StrippedAccept\n{\n    template <typename... Args>\n    struct Accept\n        : Derived::template AcceptImpl<\n              typename std::remove_cv<typename std::remove_reference<Args>::type>::type...>\n    {\n    };\n};\n\n// (t1.*f)(t2, ..., tN) when f is a pointer to a member function of a class T\n// and t1 is an object of type T or a reference to an object of type T or a\n// reference to an object of a type derived from T.\nstruct MemFunAndRef : StrippedAccept<MemFunAndRef>\n{\n    template <typename... Args>\n    struct AcceptImpl : std::false_type\n    {\n    };\n\n    template <typename R, typename C, typename... Params, typename Obj, typename... Args>\n    struct AcceptImpl<R (C::*)(Params...), Obj, Args...> : std::is_base_of<C, Obj>\n    {\n    };\n\n    template <typename R, typename C, typename... Params, typename Obj, typename... Args>\n    struct AcceptImpl<R (C::*)(Params...) const, Obj, Args...> : std::is_base_of<C, Obj>\n    {\n    };\n\n    template <typename MemFun, typename Obj, typename... Args>\n    static decltype((std::declval<Obj>().*std::declval<MemFun>())(std::declval<Args>()...))\n    Invoke(MemFun &&mem_fun, Obj &&obj, Args &&... args)\n    {\n        return (std::forward<Obj>(obj).*std::forward<MemFun>(mem_fun))(std::forward<Args>(args)...);\n    }\n};\n\n// ((*t1).*f)(t2, ..., tN) when f is a pointer to a member function of a\n// class T and t1 is not one of the types described in the previous item.\nstruct MemFunAndPtr : StrippedAccept<MemFunAndPtr>\n{\n    template <typename... Args>\n    struct AcceptImpl : std::false_type\n    {\n    };\n\n    template <typename R, typename C, typename... Params, typename Ptr, typename... Args>\n    struct AcceptImpl<R (C::*)(Params...), Ptr, Args...>\n        : std::integral_constant<bool, !std::is_base_of<C, Ptr>::value>\n    {\n    };\n\n    template <typename R, typename C, typename... Params, typename Ptr, typename... Args>\n    struct AcceptImpl<R (C::*)(Params...) const, Ptr, Args...>\n        : std::integral_constant<bool, !std::is_base_of<C, Ptr>::value>\n    {\n    };\n\n    template <typename MemFun, typename Ptr, typename... Args>\n    static decltype(((*std::declval<Ptr>()).*std::declval<MemFun>())(std::declval<Args>()...))\n    Invoke(MemFun &&mem_fun, Ptr &&ptr, Args &&... args)\n    {\n        return ((*std::forward<Ptr>(ptr)).*\n                std::forward<MemFun>(mem_fun))(std::forward<Args>(args)...);\n    }\n};\n\n// t1.*f when N == 1 and f is a pointer to member data of a class T and t1 is\n// an object of type T or a reference to an object of type T or a reference\n// to an object of a type derived from T.\nstruct DataMemAndRef : StrippedAccept<DataMemAndRef>\n{\n    template <typename... Args>\n    struct AcceptImpl : std::false_type\n    {\n    };\n\n    template <typename R, typename C, typename Obj>\n    struct AcceptImpl<R C::*, Obj> : std::is_base_of<C, Obj>\n    {\n    };\n\n    template <typename DataMem, typename Ref>\n    static decltype(std::declval<Ref>().*std::declval<DataMem>()) Invoke(DataMem &&data_mem,\n                                                                         Ref &&ref)\n    {\n        return std::forward<Ref>(ref).*std::forward<DataMem>(data_mem);\n    }\n};\n\n// (*t1).*f when N == 1 and f is a pointer to member data of a class T and t1\n// is not one of the types described in the previous item.\nstruct DataMemAndPtr : StrippedAccept<DataMemAndPtr>\n{\n    template <typename... Args>\n    struct AcceptImpl : std::false_type\n    {\n    };\n\n    template <typename R, typename C, typename Ptr>\n    struct AcceptImpl<R C::*, Ptr> : std::integral_constant<bool, !std::is_base_of<C, Ptr>::value>\n    {\n    };\n\n    template <typename DataMem, typename Ptr>\n    static decltype((*std::declval<Ptr>()).*std::declval<DataMem>()) Invoke(DataMem &&data_mem,\n                                                                            Ptr &&ptr)\n    {\n        return (*std::forward<Ptr>(ptr)).*std::forward<DataMem>(data_mem);\n    }\n};\n\n// f(t1, t2, ..., tN) in all other cases.\nstruct Callable\n{\n    // Callable doesn't have Accept because it's the last clause that gets picked\n    // when none of the previous clauses are applicable.\n    template <typename F, typename... Args>\n    static decltype(std::declval<F>()(std::declval<Args>()...)) Invoke(F &&f, Args &&... args)\n    {\n        return std::forward<F>(f)(std::forward<Args>(args)...);\n    }\n};\n\n// Resolves to the first matching clause.\ntemplate <typename... Args>\nstruct Invoker\n{\n    typedef typename std::conditional<\n        MemFunAndRef::Accept<Args...>::value,\n        MemFunAndRef,\n        typename std::conditional<\n            MemFunAndPtr::Accept<Args...>::value,\n            MemFunAndPtr,\n            typename std::conditional<\n                DataMemAndRef::Accept<Args...>::value,\n                DataMemAndRef,\n                typename std::conditional<DataMemAndPtr::Accept<Args...>::value,\n                                          DataMemAndPtr,\n                                          Callable>::type>::type>::type>::type type;\n};\n\n// The result type of Invoke<F, Args...>.\ntemplate <typename F, typename... Args>\nusing InvokeT =\n    decltype(Invoker<F, Args...>::type::Invoke(std::declval<F>(), std::declval<Args>()...));\n\n// Invoke(f, args...) is an implementation of INVOKE(f, args...) from section\n// [func.require] of the C++ standard.\ntemplate <typename F, typename... Args>\nInvokeT<F, Args...> Invoke(F &&f, Args &&... args)\n{\n    return Invoker<F, Args...>::type::Invoke(std::forward<F>(f), std::forward<Args>(args)...);\n}\n} // namespace base_internal\n} // namespace absl\n} // namespace dsn\n\n#endif // ABSL_BASE_INTERNAL_INVOKE_H_\n"
  },
  {
    "path": "include/dsn/utility/absl/utility/utility.h",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n// This header file contains C++11 versions of standard <utility> header\n// abstractions available within C++14 and C++17, and are designed to be drop-in\n// replacement for code compliant with C++14 and C++17.\n//\n// The following abstractions are defined:\n//\n//   * integer_sequence<T, Ints...>  == std::integer_sequence<T, Ints...>\n//   * index_sequence<Ints...>       == std::index_sequence<Ints...>\n//   * make_integer_sequence<T, N>   == std::make_integer_sequence<T, N>\n//   * make_index_sequence<N>        == std::make_index_sequence<N>\n//   * index_sequence_for<Ts...>     == std::index_sequence_for<Ts...>\n//   * apply<Functor, Tuple>         == std::apply<Functor, Tuple>\n//\n// This header file also provides the tag types `in_place_t`, `in_place_type_t`,\n// and `in_place_index_t`, as well as the constant `in_place`, and\n// `constexpr` `std::move()` and `std::forward()` implementations in C++11.\n//\n// References:\n//\n//  http://en.cppreference.com/w/cpp/utility/integer_sequence\n//  http://en.cppreference.com/w/cpp/utility/apply\n//  http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3658.html\n//\n\n#ifndef ABSL_UTILITY_UTILITY_H_\n#define ABSL_UTILITY_UTILITY_H_\n\n#include <cstddef>\n#include <cstdlib>\n#include <tuple>\n#include <utility>\n\n#include <dsn/utility/absl/base/internal/invoke.h>\n\n// make namespace absl internal of dsn to solve redefine error with absl in s2geometry\nnamespace dsn {\nnamespace absl {\n\n// integer_sequence\n//\n// Class template representing a compile-time integer sequence. An instantiation\n// of `integer_sequence<T, Ints...>` has a sequence of integers encoded in its\n// type through its template arguments (which is a common need when\n// working with C++11 variadic templates). `absl::integer_sequence` is designed\n// to be a drop-in replacement for C++14's `std::integer_sequence`.\n//\n// Example:\n//\n//   template< class T, T... Ints >\n//   void user_function(integer_sequence<T, Ints...>);\n//\n//   int main()\n//   {\n//     // user_function's `T` will be deduced to `int` and `Ints...`\n//     // will be deduced to `0, 1, 2, 3, 4`.\n//     user_function(make_integer_sequence<int, 5>());\n//   }\ntemplate <typename T, T... Ints>\nstruct integer_sequence\n{\n    using value_type = T;\n    static constexpr size_t size() noexcept { return sizeof...(Ints); }\n};\n\n// index_sequence\n//\n// A helper template for an `integer_sequence` of `size_t`,\n// `absl::index_sequence` is designed to be a drop-in replacement for C++14's\n// `std::index_sequence`.\ntemplate <size_t... Ints>\nusing index_sequence = integer_sequence<size_t, Ints...>;\n\nnamespace utility_internal {\n\ntemplate <typename Seq, size_t SeqSize, size_t Rem>\nstruct Extend;\n\n// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency.\ntemplate <typename T, T... Ints, size_t SeqSize>\nstruct Extend<integer_sequence<T, Ints...>, SeqSize, 0>\n{\n    using type = integer_sequence<T, Ints..., (Ints + SeqSize)...>;\n};\n\ntemplate <typename T, T... Ints, size_t SeqSize>\nstruct Extend<integer_sequence<T, Ints...>, SeqSize, 1>\n{\n    using type = integer_sequence<T, Ints..., (Ints + SeqSize)..., 2 * SeqSize>;\n};\n\n// Recursion helper for 'make_integer_sequence<T, N>'.\n// 'Gen<T, N>::type' is an alias for 'integer_sequence<T, 0, 1, ... N-1>'.\ntemplate <typename T, size_t N>\nstruct Gen\n{\n    using type = typename Extend<typename Gen<T, N / 2>::type, N / 2, N % 2>::type;\n};\n\ntemplate <typename T>\nstruct Gen<T, 0>\n{\n    using type = integer_sequence<T>;\n};\n\n} // namespace utility_internal\n\n// Compile-time sequences of integers\n\n// make_integer_sequence\n//\n// This template alias is equivalent to\n// `integer_sequence<int, 0, 1, ..., N-1>`, and is designed to be a drop-in\n// replacement for C++14's `std::make_integer_sequence`.\ntemplate <typename T, T N>\nusing make_integer_sequence = typename utility_internal::Gen<T, N>::type;\n\n// make_index_sequence\n//\n// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`,\n// and is designed to be a drop-in replacement for C++14's\n// `std::make_index_sequence`.\ntemplate <size_t N>\nusing make_index_sequence = make_integer_sequence<size_t, N>;\n\n// index_sequence_for\n//\n// Converts a typename pack into an index sequence of the same length, and\n// is designed to be a drop-in replacement for C++14's\n// `std::index_sequence_for()`\ntemplate <typename... Ts>\nusing index_sequence_for = make_index_sequence<sizeof...(Ts)>;\n\nnamespace utility_internal {\n// Helper method for expanding tuple into a called method.\ntemplate <typename Functor, typename Tuple, std::size_t... Indexes>\nauto apply_helper(Functor &&functor, Tuple &&t, index_sequence<Indexes...>)\n    -> decltype(absl::base_internal::Invoke(std::forward<Functor>(functor),\n                                            std::get<Indexes>(std::forward<Tuple>(t))...))\n{\n    return absl::base_internal::Invoke(std::forward<Functor>(functor),\n                                       std::get<Indexes>(std::forward<Tuple>(t))...);\n}\n\n} // namespace utility_internal\n\n// apply\n//\n// Invokes a Callable using elements of a tuple as its arguments.\n// Each element of the tuple corresponds to an argument of the call (in order).\n// Both the Callable argument and the tuple argument are perfect-forwarded.\n// For member-function Callables, the first tuple element acts as the `this`\n// pointer. `absl::apply` is designed to be a drop-in replacement for C++17's\n// `std::apply`. Unlike C++17's `std::apply`, this is not currently `constexpr`.\n//\n// Example:\n//\n//   class Foo{void Bar(int);};\n//   void user_function(int, std::string);\n//   void user_function(std::unique_ptr<Foo>);\n//\n//   int main()\n//   {\n//       std::tuple<int, std::string> tuple1(42, \"bar\");\n//       // Invokes the user function overload on int, std::string.\n//       absl::apply(&user_function, tuple1);\n//\n//       auto foo = absl::make_unique<Foo>();\n//       std::tuple<Foo*, int> tuple2(foo.get(), 42);\n//       // Invokes the method Bar on foo with one argument 42.\n//       absl::apply(&Foo::Bar, foo.get(), 42);\n//\n//       std::tuple<std::unique_ptr<Foo>> tuple3(absl::make_unique<Foo>());\n//       // Invokes the user function that takes ownership of the unique\n//       // pointer.\n//       absl::apply(&user_function, std::move(tuple));\n//   }\ntemplate <typename Functor, typename Tuple>\nauto apply(Functor &&functor, Tuple &&t) -> decltype(utility_internal::apply_helper(\n    std::forward<Functor>(functor),\n    std::forward<Tuple>(t),\n    absl::make_index_sequence<\n        std::tuple_size<typename std::remove_reference<Tuple>::type>::value>{}))\n{\n    return utility_internal::apply_helper(\n        std::forward<Functor>(functor),\n        std::forward<Tuple>(t),\n        absl::make_index_sequence<\n            std::tuple_size<typename std::remove_reference<Tuple>::type>::value>{});\n}\n} // namespace absl\n} // namespace dsn\n\n#endif // ABSL_UTILITY_UTILITY_H_\n"
  },
  {
    "path": "include/dsn/utility/alloc.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <functional>\n#include <memory>\n#include <new>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\n\n#ifdef CACHELINE_SIZE\n\nextern void *cacheline_aligned_alloc(size_t size);\n\nextern void cacheline_aligned_free(void *mem_block);\n\ntemplate <typename T>\nusing cacheline_aligned_ptr = typename std::unique_ptr<T, std::function<void(void *)>>;\n\ntemplate <typename T>\ncacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len)\n{\n    void *buffer = cacheline_aligned_alloc(sizeof(T) * len);\n    if (dsn_unlikely(buffer == nullptr)) {\n        return cacheline_aligned_ptr<T>(nullptr, [](void *) {});\n    }\n\n    T *array = new (buffer) T[len];\n\n#ifndef NDEBUG\n    if (sizeof(T) <= CACHELINE_SIZE && (sizeof(T) & (sizeof(T) - 1)) == 0) {\n        for (size_t i = 0; i < len; ++i) {\n            T *elem = &(array[i]);\n            dassert_f((reinterpret_cast<const uintptr_t>(elem) & (sizeof(T) - 1)) == 0,\n                      \"unaligned array element for cache line: array={}, length={}, index={}, \"\n                      \"elem={}, elem_size={}, mask={}, cacheline_size={}\",\n                      fmt::ptr(array),\n                      len,\n                      i,\n                      fmt::ptr(elem),\n                      sizeof(T),\n                      sizeof(T) - 1,\n                      CACHELINE_SIZE);\n        }\n    }\n#endif\n\n    return cacheline_aligned_ptr<T>(array, cacheline_aligned_free);\n}\n\ntemplate <typename T>\ncacheline_aligned_ptr<T> cacheline_aligned_alloc_array(size_t len, const T &val)\n{\n    auto array = cacheline_aligned_alloc_array<T>(len);\n    if (array) {\n        std::fill(array.get(), array.get() + len, val);\n    }\n\n    return array;\n}\n\n#endif\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/apply.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/absl/utility/utility.h>\n\nnamespace dsn {\n\nusing absl::apply;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/autoref_ptr.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <atomic>\n#include <cassert>\n#include <type_traits>\n#include <utility>\n\nnamespace dsn {\nclass ref_counter\n{\npublic:\n    ref_counter() : _magic(0xdeadbeef), _counter(0) {}\n\n    virtual ~ref_counter()\n    {\n        // 0xdeadbeef: 3735928559\n        assert(_magic == 0xdeadbeef);\n\n        // 0xfacedead: 4207861421\n        _magic = 0xfacedead;\n    }\n\n    void add_ref()\n    {\n        // 0xdeadbeef: 3735928559\n        assert(_magic == 0xdeadbeef);\n\n        // Increasing the reference counter can always be done with memory_order_relaxed:\n        // New references to an object can only be formed from an existing reference,\n        // and passing an existing reference from one thread to another must already provide any\n        // required synchronization.\n        _counter.fetch_add(1, std::memory_order_relaxed);\n    }\n\n    void release_ref()\n    {\n        // 0xdeadbeef: 3735928559\n        assert(_magic == 0xdeadbeef);\n\n        // It is important to enforce any possible access to the object in one thread\n        //(through an existing reference) to happen before deleting the object in a different\n        // thread.\n        // This is achieved by a \"release\" operation after dropping a reference\n        //(any access to the object through this reference must obviously happened before),\n        // and an \"acquire\" operation before deleting the object.\n        // reference: http://www.boost.org/doc/libs/1_60_0/doc/html/atomic/usage_examples.html\n        if (_counter.fetch_sub(1, std::memory_order_release) == 1) {\n            std::atomic_thread_fence(std::memory_order_acquire);\n            delete this;\n        }\n    }\n\n    long get_count() const { return _counter.load(); }\n\nprotected:\n    unsigned int _magic;\n    std::atomic<long> _counter;\n\npublic:\n    ref_counter(const ref_counter &) = delete;\n    ref_counter &operator=(const ref_counter &) = delete;\n};\n\ntemplate <typename T> // T : ref_counter\nclass ref_ptr\n{\npublic:\n    ref_ptr() : _obj(nullptr) {}\n\n    ref_ptr(T *obj) : _obj(obj)\n    {\n        if (nullptr != _obj)\n            _obj->add_ref();\n    }\n\n    ref_ptr(const ref_ptr<T> &r)\n    {\n        _obj = r.get();\n        if (nullptr != _obj)\n            _obj->add_ref();\n    }\n\n    template <typename U,\n              typename = typename std::enable_if<std::is_convertible<U *, T *>::value>::type>\n    ref_ptr(const ref_ptr<U> &r)\n    {\n        _obj = r.get();\n        if (nullptr != _obj)\n            _obj->add_ref();\n    }\n\n    ref_ptr(ref_ptr<T> &&r) : _obj(r._obj) { r._obj = nullptr; }\n\n    template <typename U,\n              typename = typename std::enable_if<std::is_convertible<U *, T *>::value>::type>\n    ref_ptr(ref_ptr<U> &&r) noexcept : _obj(r._obj)\n    {\n        r._obj = nullptr;\n    }\n\n    ~ref_ptr()\n    {\n        if (nullptr != _obj) {\n            _obj->release_ref();\n        }\n    }\n\n    ref_ptr<T> &operator=(T *obj) { return *this = ref_ptr(obj); }\n\n    ref_ptr<T> &operator=(ref_ptr<T> r) noexcept\n    {\n        swap(r);\n        return *this;\n    }\n\n    template <typename U,\n              typename = typename std::enable_if<std::is_convertible<U *, T *>::value>::type>\n    ref_ptr<T> &operator=(ref_ptr<U> r) noexcept\n    {\n        ref_ptr<T> p(r);\n        swap(p);\n        return *this;\n    }\n\n    void swap(ref_ptr<T> &r) noexcept { std::swap(_obj, r._obj); }\n\n    T *get() const { return _obj; }\n\n    operator T *() const { return _obj; }\n\n    T &operator*() const { return (*_obj); }\n\n    T *operator->() const { return _obj; }\n\n    bool operator==(T *r) const { return _obj == r; }\n\n    bool operator!=(T *r) const { return _obj != r; }\n\nprivate:\n    T *_obj;\n\n    template <typename U>\n    friend class ref_ptr;\n};\n\n} // end namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/binary_reader.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <cstring>\n#include <dsn/utility/blob.h>\n#include <gtest/gtest_prod.h>\n\nnamespace dsn {\nclass binary_reader\n{\npublic:\n    // given bb on ctor\n    binary_reader(const blob &blob);\n    binary_reader(blob &&blob);\n\n    // or delayed init\n    binary_reader() {}\n\n    virtual ~binary_reader() {}\n\n    void init(const blob &bb);\n    void init(blob &&bb);\n\n    template <typename T>\n    int read_pod(/*out*/ T &val);\n    template <typename T>\n    int read(/*out*/ T &val)\n    {\n        // read of this type is not implemented\n        assert(false);\n        return 0;\n    }\n    int read(/*out*/ int8_t &val) { return read_pod(val); }\n    int read(/*out*/ uint8_t &val) { return read_pod(val); }\n    int read(/*out*/ int16_t &val) { return read_pod(val); }\n    int read(/*out*/ uint16_t &val) { return read_pod(val); }\n    int read(/*out*/ int32_t &val) { return read_pod(val); }\n    int read(/*out*/ uint32_t &val) { return read_pod(val); }\n    int read(/*out*/ int64_t &val) { return read_pod(val); }\n    int read(/*out*/ uint64_t &val) { return read_pod(val); }\n    int read(/*out*/ bool &val) { return read_pod(val); }\n\n    int read(/*out*/ std::string &s);\n    virtual int read(char *buffer, int sz);\n    int read(blob &blob);\n    virtual int read(blob &blob, int len);\n\n    blob get_buffer() const { return _blob; }\n    blob get_remaining_buffer() const { return _blob.range(static_cast<int>(_ptr - _blob.data())); }\n    bool is_eof() const { return _ptr >= _blob.data() + _size; }\n    int total_size() const { return _size; }\n    int get_remaining_size() const { return _remaining_size; }\n\nprotected:\n    int inner_read(blob &blob, int len);\n    int inner_read(char *buffer, int sz);\n\nprivate:\n    blob _blob;\n    int _size;\n    const char *_ptr;\n    int _remaining_size;\n\n    FRIEND_TEST(binary_reader_test, inner_read);\n};\n\ntemplate <typename T>\ninline int binary_reader::read_pod(/*out*/ T &val)\n{\n    if (sizeof(T) <= get_remaining_size()) {\n        memcpy((void *)&val, _ptr, sizeof(T));\n        _ptr += sizeof(T);\n        _remaining_size -= sizeof(T);\n        return static_cast<int>(sizeof(T));\n    } else {\n        // read beyond the end of buffer\n        assert(false);\n        return 0;\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/binary_writer.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/blob.h>\n#include <cstring>\n\nnamespace dsn {\n\nclass binary_writer\n{\npublic:\n    binary_writer(int reserved_buffer_size = 0);\n    binary_writer(blob &buffer);\n    virtual ~binary_writer();\n\n    virtual void flush();\n\n    template <typename T>\n    void write_pod(const T &val);\n    template <typename T>\n    void write(const T &val)\n    {\n        // write of this type is not implemented\n        assert(false);\n    }\n    void write(const int8_t &val) { write_pod(val); }\n    void write(const uint8_t &val) { write_pod(val); }\n    void write(const int16_t &val) { write_pod(val); }\n    void write(const uint16_t &val) { write_pod(val); }\n    void write(const int32_t &val) { write_pod(val); }\n    void write(const uint32_t &val) { write_pod(val); }\n    void write(const int64_t &val) { write_pod(val); }\n    void write(const uint64_t &val) { write_pod(val); }\n    void write(const bool &val) { write_pod(val); }\n\n    void write(const std::string &val);\n    void write(const char *buffer, int sz);\n    void write(const blob &val);\n    void write_empty(int sz);\n\n    bool next(void **data, int *size);\n    bool backup(int count);\n\n    void get_buffers(/*out*/ std::vector<blob> &buffers);\n    int get_buffer_count() const { return static_cast<int>(_buffers.size()); }\n    blob get_buffer();\n    blob get_current_buffer(); // without commit, write can be continued on the last buffer\n    blob get_first_buffer() const;\n\n    int total_size() const { return _total_size; }\n\nprotected:\n    // bb may have large space than size\n    void create_buffer(size_t size);\n    void commit();\n    virtual void create_new_buffer(size_t size, /*out*/ blob &bb);\n\nprivate:\n    std::vector<blob> _buffers;\n\n    char *_current_buffer;\n    int _current_offset;\n    int _current_buffer_length;\n\n    int _total_size;\n    int _reserved_size_per_buffer;\n    static int _reserved_size_per_buffer_static;\n};\n\n//--------------- inline implementation -------------------\ntemplate <typename T>\ninline void binary_writer::write_pod(const T &val)\n{\n    write((char *)&val, static_cast<int>(sizeof(T)));\n}\n\ninline void binary_writer::get_buffers(/*out*/ std::vector<blob> &buffers)\n{\n    commit();\n    buffers = _buffers;\n}\n\ninline blob binary_writer::get_first_buffer() const { return _buffers[0]; }\n\ninline void binary_writer::write(const std::string &val)\n{\n    int len = static_cast<int>(val.length());\n    write((const char *)&len, sizeof(int));\n    if (len > 0)\n        write((const char *)&val[0], len);\n}\n\ninline void binary_writer::write(const blob &val)\n{\n    // TODO: optimization by not memcpy\n    int len = val.length();\n    write((const char *)&len, sizeof(int));\n    if (len > 0)\n        write((const char *)val.data(), len);\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/blob.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <memory>\n#include <thrift/protocol/TProtocol.h>\n\nnamespace dsn {\n\n/// dsn::blob is a special thrift type that's not generated by thrift compiler,\n/// but defined by the rDSN framework. Unlike thrift `string`, dsn::blob is\n/// implemented by ref-counted buffer.\nclass blob\n{\npublic:\n    constexpr blob() = default;\n\n    blob(std::shared_ptr<char> buffer, unsigned int length)\n        : _holder(std::move(buffer)), _buffer(_holder.get()), _data(_holder.get()), _length(length)\n    {\n    }\n\n    blob(std::shared_ptr<char> buffer, int offset, unsigned int length)\n        : _holder(std::move(buffer)),\n          _buffer(_holder.get()),\n          _data(_holder.get() + offset),\n          _length(length)\n    {\n    }\n\n    /// NOTE: Use dsn::string_view whenever possible.\n    /// blob is designed for shared buffer, never use it as constant view.\n    /// Maybe we could deprecate this function in the future.\n    blob(const char *buffer, int offset, unsigned int length)\n        : _buffer(buffer), _data(buffer + offset), _length(length)\n    {\n    }\n\n    /// Create shared buffer from allocated raw bytes.\n    /// NOTE: this operation is not efficient since it involves a memory copy.\n    static blob create_from_bytes(const char *s, size_t len)\n    {\n        std::shared_ptr<char> s_arr(new char[len], std::default_delete<char[]>());\n        memcpy(s_arr.get(), s, len);\n        return blob(std::move(s_arr), 0, static_cast<unsigned int>(len));\n    }\n\n    /// Create shared buffer without copying data.\n    static blob create_from_bytes(std::string &&bytes)\n    {\n        auto s = new std::string(std::move(bytes));\n        std::shared_ptr<char> buf(const_cast<char *>(s->data()), [s](char *) { delete s; });\n        return blob(std::move(buf), 0, static_cast<unsigned int>(s->length()));\n    }\n\n    void assign(const std::shared_ptr<char> &buffer, int offset, unsigned int length)\n    {\n        _holder = buffer;\n        _buffer = _holder.get();\n        _data = _holder.get() + offset;\n        _length = length;\n    }\n\n    void assign(std::shared_ptr<char> &&buffer, int offset, unsigned int length)\n    {\n        _holder = std::move(buffer);\n        _buffer = (_holder.get());\n        _data = (_holder.get() + offset);\n        _length = length;\n    }\n\n    /// Deprecated. Use dsn::string_view whenever possible.\n    void assign(const char *buffer, int offset, unsigned int length)\n    {\n        _holder = nullptr;\n        _buffer = buffer;\n        _data = buffer + offset;\n        _length = length;\n    }\n\n    const char *data() const noexcept { return _data; }\n\n    unsigned int length() const noexcept { return _length; }\n    unsigned int size() const noexcept { return _length; }\n\n    std::shared_ptr<char> buffer() const { return _holder; }\n\n    const char *buffer_ptr() const { return _holder.get(); }\n\n    // offset can be negative for buffer dereference\n    blob range(int offset) const\n    {\n        // offset cannot exceed the current length value\n        assert(offset <= static_cast<int>(_length));\n\n        blob temp = *this;\n        temp._data += offset;\n        temp._length -= offset;\n        return temp;\n    }\n\n    blob range(int offset, unsigned int len) const\n    {\n        // offset cannot exceed the current length value\n        assert(offset <= static_cast<int>(_length));\n\n        blob temp = *this;\n        temp._data += offset;\n        temp._length -= offset;\n\n        // buffer length must exceed the required length\n        assert(temp._length >= len);\n        temp._length = len;\n        return temp;\n    }\n\n    bool operator==(const blob &r) const\n    {\n        // not implemented\n        assert(false);\n        return false;\n    }\n\n    std::string to_string() const\n    {\n        if (_length == 0)\n            return {};\n        return std::string(_data, _length);\n    }\n\n    // for serialization in thrift format\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\nprivate:\n    friend class binary_writer;\n    std::shared_ptr<char> _holder;\n    const char *_buffer{nullptr};\n    const char *_data{nullptr};\n    unsigned int _length{0}; // data length\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/casts.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <cassert>\n#include <type_traits>\n\nnamespace dsn {\n\n// Downcasting is to convert a base-class pointer(reference) to a derived-class\n// pointer(reference). As a usual approach, RTTI (dynamic_cast<>) is not efficient.\n// Instead, we can perform a compile-time assertion check whether one is derived\n// from another; then, just use static_cast<> to do the conversion faster. RTTI is\n// also run in debug mode to do double-check.\n\ntemplate <typename To, typename From>\ninline To down_cast(From *from)\n{\n    // Perform a compile-time assertion to check whether <From> class is derived from <To> class.\n    static_assert(std::is_base_of<typename std::remove_pointer<From>::type,\n                                  typename std::remove_pointer<To>::type>::value,\n                  \"<From> class is not derived from <To> class\");\n\n    // Use RTTI to do double-check, though in practice the unit tests are seldom built in debug\n    // mode. For example, the unit tests of github CI for both rDSN and Pegasus are built in\n    // release mode.\n    assert(from == NULL || dynamic_cast<To>(from) != NULL);\n\n    return static_cast<To>(from);\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/chrono_literals.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <chrono>\n\n/// This is a simple implementation of chrono literals\n/// (http://en.cppreference.com/w/cpp/chrono/duration#Literals).\n/// Deprecate this when we have our compiler version updated to gcc-5\n/// (https://en.cppreference.com/w/cpp/compiler_support).\n\nnamespace dsn {\n\n/// Example:\n///\n///   using namespace dsn::literals::chrono_literals;\n///\n///   template <typename F>\n///   void schedule(F &&f, std::chrono::milliseconds delay_ms = 0_ms);\n///\n\ninline namespace literals {\ninline namespace chrono_literals {\n\nconstexpr std::chrono::hours operator\"\" _h(unsigned long long v) { return std::chrono::hours{v}; }\n\nconstexpr std::chrono::minutes operator\"\" _min(unsigned long long v)\n{\n    return std::chrono::minutes{v};\n}\n\nconstexpr std::chrono::seconds operator\"\" _s(unsigned long long v)\n{\n    return std::chrono::seconds{v};\n}\n\nconstexpr std::chrono::milliseconds operator\"\" _ms(unsigned long long v)\n{\n    return std::chrono::milliseconds{v};\n}\n\nconstexpr std::chrono::microseconds operator\"\" _us(unsigned long long v)\n{\n    return std::chrono::microseconds{v};\n}\n\nconstexpr std::chrono::nanoseconds operator\"\" _ns(unsigned long long v)\n{\n    return std::chrono::nanoseconds{v};\n}\n\n} // inline namespace chrono_literals\n} // inline namespace literals\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/clock.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <memory>\n\nnamespace dsn {\nnamespace utils {\n\nclass clock\n{\npublic:\n    clock() = default;\n    virtual ~clock() = default;\n\n    // Gets current time in nanoseconds.\n    virtual uint64_t now_ns() const;\n\n    // Gets singleton instance. eager singleton, which is thread safe\n    static const clock *instance();\n\n    // Resets the global clock implementation (not thread-safety)\n    static void mock(clock *mock_clock);\n\nprivate:\n    static std::unique_ptr<clock> _clock;\n};\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/config_api.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <string>\n#include <vector>\n#include <cstdint>\n\n/// load a ini configuration file, and replace specific strings in file with arguments.\n///\n/// the rules of replacement is as follows:\n///     1. arguments are with format of k1=v1,k2=v2,k3=v3\n///     2. if a string if file is %k1%, it will be replaced with v1.\n///        %k2%, %k3% will be replaced to v2 and v3 in a similar fashion\n///\n/// for example:\n///\n/// the file content:\n/// \"\n/// [example_section]\n/// %replace_key% = real_value\n/// real_port = %host_port%\n/// \"\n///\n/// and the arguments \"replace_key=key,host_port=8080\n///\n/// so the loaded config will be:\n/// \"\n/// [example_section]\n/// key = real_value\n/// real_port = 8080\n///\n/// return true if load config file succeed, otherwise false\n///\n/// please call this function at the beginning of your process, otherwise non of the\n/// dsn_config_xxx function works\n///\n/// the function is not thread safe.\nbool dsn_config_load(const char *file, const char *arguments);\n\n/// dump the global configuration\nvoid dsn_config_dump(std::ostream &os);\n\n/// the section format:\n/// [section]\n/// key = value\n///\n/// this function get the value of a key in some section.\n/// if <key, value> is not present in section, then return default_value.\n/// dsptr is the description of the key-value pairs.\n///\n/// this function is not thread safe if get/set are concurrently called\nconst char *dsn_config_get_value_string(const char *section,\n                                        const char *key,\n                                        const char *default_value,\n                                        const char *dsptr);\n\n/// get value of a key in some section, the value should be \"true\" or \"false\"\n/// this function is not thread safe if dsn_config_set is concurrently called\nbool dsn_config_get_value_bool(const char *section,\n                               const char *key,\n                               bool default_value,\n                               const char *dsptr);\n\n/// get value of a key in some section, the value should be decimal in range of uint64_t\n/// this function is not thread safe if dsn_config_set is concurrently called\nuint64_t dsn_config_get_value_uint64(const char *section,\n                                     const char *key,\n                                     uint64_t default_value,\n                                     const char *dsptr);\n\n/// get value of a key in some section, the value should be decimal in range of int64_t\n/// this function is not thread safe if dsn_config_set is concurrently called\nint64_t dsn_config_get_value_int64(const char *section,\n                                   const char *key,\n                                   int64_t default_value,\n                                   const char *dsptr);\n\n/// get value of a key in some section, the value should be decimal in range of double\n/// this function is not thread safe if dsn_config_set is concurrently called\ndouble dsn_config_get_value_double(const char *section,\n                                   const char *key,\n                                   double default_value,\n                                   const char *dsptr);\n\n/// get the names of all sections\n/// this function is not thread safe if dsn_config_set is concurrently called\nvoid dsn_config_get_all_sections(/*out*/ std::vector<std::string> &sections);\n\n/// get the names of all sections\n/// this function is not thread safe if dsn_config_set is concurrently called\nvoid dsn_config_get_all_sections(/*out*/ std::vector<const char *> &sections);\n\n/// get all keys in some specific section\n/// this function is not thread safe if dsn_config_set is concurrently called\nvoid dsn_config_get_all_keys(const char *section, /*out*/ std::vector<std::string> &keys);\n\n/// get all keys in some specific section\n/// this function is not thread safe if dsn_config_set is concurrently called\nvoid dsn_config_get_all_keys(const char *section, /*out*/ std::vector<const char *> &keys);\n\n/// set value for a key of some section.\n/// if the section doesn't exsit, a new one will be created.\n/// if the key doesn't exist, a new one will be created\n///\n/// multiple concurrent set are thread safe.\n///\n/// any of dsn_config_get_xxx may corrupt if called concurrently with dsn_config_set\nvoid dsn_config_set(const char *section, const char *key, const char *value, const char *dsptr);\n"
  },
  {
    "path": "include/dsn/utility/config_helper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/strings.h>\n\n/// you can use following macros to implement a function called \"read_config\"\n/// to initialize a structure from the configuration file quickly\n///\n/// please refer to \"task_spec.h\". it's a very good example\n\n#define CONFIG_BEGIN(t_struct)                                                                     \\\n    inline bool read_config(                                                                       \\\n        const char *section, /*out*/ t_struct &val, t_struct *default_value = nullptr)             \\\n    {\n\n#define CONFIG_END                                                                                 \\\n    return true;                                                                                   \\\n    }\n\n// type fld = xyz\n#define CONFIG_FLD(real_type, config_type, fld, default_fld_value, dsptr)                          \\\n    val.fld = (real_type)dsn_config_get_value_##config_type(                                       \\\n        section, #fld, default_value ? default_value->fld : default_fld_value, dsptr);\n\n#define CONFIG_FLD_STRING(fld, default_fld_value, dsptr)                                           \\\n    val.fld = dsn_config_get_value_string(                                                         \\\n        section,                                                                                   \\\n        #fld,                                                                                      \\\n        (val.fld.length() > 0 && val.fld != std::string(default_fld_value))                        \\\n            ? val.fld.c_str()                                                                      \\\n            : (default_value ? default_value->fld.c_str() : default_fld_value),                    \\\n        dsptr);\n\n// customized_id<type> fld = xyz\n#define CONFIG_FLD_ID(type, fld, default_fld_value, defined_before_read_config, dsptr)             \\\n    {                                                                                              \\\n        std::string v = dsn_config_get_value_string(section, #fld, \"\", dsptr);                     \\\n        if (v == \"\") {                                                                             \\\n            if (!defined_before_read_config) {                                                     \\\n                if (default_value)                                                                 \\\n                    val.fld = default_value->fld;                                                  \\\n                else                                                                               \\\n                    val.fld = default_fld_value;                                                   \\\n            }                                                                                      \\\n        } else {                                                                                   \\\n            if (!type::is_exist(v.c_str())) {                                                      \\\n                printf(\"invalid enum configuration '[%s] %s = %s'\\n\", section, #fld, v.c_str());   \\\n                return false;                                                                      \\\n            } else                                                                                 \\\n                val.fld = type(v.c_str());                                                         \\\n        }                                                                                          \\\n    }\n\n// enum type fld = xyz\n#define CONFIG_FLD_ENUM(                                                                           \\\n    type, fld, default_fld_value, invalid_enum, defined_before_read_config, dsptr)                 \\\n    {                                                                                              \\\n        std::string v = dsn_config_get_value_string(section, #fld, \"\", dsptr);                     \\\n        if (v == \"\") {                                                                             \\\n            if (!defined_before_read_config) {                                                     \\\n                if (default_value)                                                                 \\\n                    val.fld = default_value->fld;                                                  \\\n                else                                                                               \\\n                    val.fld = default_fld_value;                                                   \\\n            }                                                                                      \\\n        } else {                                                                                   \\\n            auto v2 = enum_from_string(v.c_str(), invalid_enum);                                   \\\n            if (v2 == invalid_enum) {                                                              \\\n                printf(\"invalid enum configuration '[%s] %s = %s'\\n\", section, #fld, v.c_str());   \\\n                return false;                                                                      \\\n            } else                                                                                 \\\n                val.fld = v2;                                                                      \\\n        }                                                                                          \\\n    }\n\n// list<customized_id<type>> fld = x,y,z\n#define CONFIG_FLD_ID_LIST(type, fld, dsptr)                                                       \\\n    {                                                                                              \\\n        val.fld.clear();                                                                           \\\n        std::string vv = dsn_config_get_value_string(section, #fld, \"\", dsptr);                    \\\n        std::list<std::string> lv;                                                                 \\\n        ::dsn::utils::split_args(vv.c_str(), lv, ',');                                             \\\n        for (auto &v : lv) {                                                                       \\\n            if (!type::is_exist(v.c_str())) {                                                      \\\n                printf(\"invalid enum configuration '[%s] %s = %s'\\n\", section, #fld, v.c_str());   \\\n                return false;                                                                      \\\n            } else                                                                                 \\\n                val.fld.push_back(type(v.c_str()));                                                \\\n        }                                                                                          \\\n        if (val.fld.size() == 0 && default_value)                                                  \\\n            val.fld = default_value->fld;                                                          \\\n    }\n\n// list<type> fld = x,y,z\n#define CONFIG_FLD_STRING_LIST(fld, dsptr)                                                         \\\n    {                                                                                              \\\n        std::string vv = dsn_config_get_value_string(section, #fld, \"\", dsptr);                    \\\n        ::dsn::utils::split_args(vv.c_str(), val.fld, ',');                                        \\\n        if (val.fld.size() == 0 && default_value)                                                  \\\n            val.fld = default_value->fld;                                                          \\\n    }\n\n// cb: std::list<int>& => fld value\n#define CONFIG_FLD_INT_LIST(fld, dsptr)                                                            \\\n    {                                                                                              \\\n        std::string vv = dsn_config_get_value_string(section, #fld, \"\", dsptr);                    \\\n        std::list<std::string> lv;                                                                 \\\n        ::dsn::utils::split_args(vv.c_str(), lv, ',');                                             \\\n        if (lv.size() == 0 && default_value)                                                       \\\n            val.fld = default_value->fld;                                                          \\\n        else {                                                                                     \\\n            for (auto &s : lv) {                                                                   \\\n                val.fld.push_back(atoi(s.c_str()));                                                \\\n            }                                                                                      \\\n        }                                                                                          \\\n    }\n"
  },
  {
    "path": "include/dsn/utility/configuration.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <memory>\n#include <vector>\n#include <map>\n#include <cstdio>\n#include <cstdlib>\n#include <cstring>\n#include <string>\n#include <list>\n#include <mutex>\n#include <dsn/utility/string_conv.h>\n\nnamespace dsn {\n\nclass configuration\n{\npublic:\n    configuration();\n\n    ~configuration();\n\n    // arguments: k1=v1;k2=v2;k3=v3; ...\n    // e.g.,\n    //    port = %port%\n    //    timeout = %timeout%\n    // arguments: port=23466;timeout=1000\n    bool load(const char *file_name, const char *arguments = nullptr);\n\n    void get_all_sections(std::vector<std::string> &sections);\n\n    void get_all_section_ptrs(std::vector<const char *> &sections);\n\n    void get_all_keys(const char *section, std::vector<const char *> &keys);\n\n    const char *get_string_value(const char *section,\n                                 const char *key,\n                                 const char *default_value,\n                                 const char *dsptr);\n\n    std::list<std::string>\n    get_string_value_list(const char *section, const char *key, char splitter, const char *dsptr);\n\n    void set(const char *section, const char *key, const char *value, const char *dsptr);\n\n    bool has_section(const char *section);\n\n    bool has_key(const char *section, const char *key);\n\n    const char *get_file_name() const { return _file_name.c_str(); }\n\n    bool set_warning(bool warn)\n    {\n        bool old = _warning;\n        _warning = warn;\n        return old;\n    }\n\n    void dump(std::ostream &os);\n\n    // ---------------------- commmon routines ----------------------------------\n\n    template <typename T>\n    T get_value(const char *section, const char *key, T default_value, const char *dsptr);\n\nprivate:\n    bool get_string_value_internal(const char *section,\n                                   const char *key,\n                                   const char *default_value,\n                                   const char **ov,\n                                   const char *dsptr);\n\nprivate:\n    struct conf\n    {\n        std::string section;\n        std::string key;\n        std::string value;\n        int line;\n\n        bool present;\n        std::string dsptr;\n    };\n\n    typedef std::map<std::string, std::map<std::string, conf *>> config_map;\n    std::mutex _lock;\n    config_map _configs;\n\n    std::string _file_name;\n    std::string _file_data;\n    bool _warning;\n};\n\ntemplate <>\ninline std::string configuration::get_value<std::string>(const char *section,\n                                                         const char *key,\n                                                         std::string default_value,\n                                                         const char *dsptr)\n{\n    return get_string_value(section, key, default_value.c_str(), dsptr);\n}\n\ntemplate <>\ninline double configuration::get_value<double>(const char *section,\n                                               const char *key,\n                                               double default_value,\n                                               const char *dsptr)\n{\n    const char *value;\n    char defaultstr[32];\n    sprintf(defaultstr, \"%lf\", default_value);\n\n    if (!get_string_value_internal(section, key, defaultstr, &value, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%lf'\\n\",\n                   section,\n                   key,\n                   default_value);\n        }\n\n        return default_value;\n    } else {\n        return atof(value);\n    }\n}\n\ntemplate <>\ninline int64_t configuration::get_value<int64_t>(const char *section,\n                                                 const char *key,\n                                                 int64_t default_value,\n                                                 const char *dsptr)\n{\n    const char *value;\n    char defaultstr[32];\n    sprintf(defaultstr, \"%\" PRId64, default_value);\n\n    if (!get_string_value_internal(section, key, defaultstr, &value, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%\" PRId64\n                   \"'\\n\",\n                   section,\n                   key,\n                   default_value);\n        }\n\n        return default_value;\n    } else {\n        int64_t result = default_value;\n        bool suc = dsn::buf2int64(value, result);\n        assert(suc || result == default_value);\n        return result;\n    }\n}\n\ntemplate <>\ninline uint64_t configuration::get_value<uint64_t>(const char *section,\n                                                   const char *key,\n                                                   uint64_t default_value,\n                                                   const char *dsptr)\n{\n    const char *value;\n    char defaultstr[32];\n    sprintf(defaultstr, \"%\" PRIu64, default_value);\n\n    if (!get_string_value_internal(section, key, defaultstr, &value, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%\" PRIu64\n                   \"'\\n\",\n                   section,\n                   key,\n                   default_value);\n        }\n\n        return default_value;\n    } else {\n        uint64_t result = default_value;\n        bool suc = dsn::buf2uint64(value, result);\n        assert(suc || result == default_value);\n        return result;\n    }\n}\n\ntemplate <>\ninline bool configuration::get_value<bool>(const char *section,\n                                           const char *key,\n                                           bool default_value,\n                                           const char *dsptr)\n{\n    const char *value;\n    const char *defaultstr = (default_value ? \"true\" : \"false\");\n\n    if (!get_string_value_internal(section, key, defaultstr, &value, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%s'\\n\",\n                   section,\n                   key,\n                   default_value ? \"true\" : \"false\");\n        }\n        return default_value;\n    } else {\n        bool result = default_value;\n        bool suc = dsn::buf2bool(value, result);\n        assert(suc || result == default_value);\n        return result;\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/crc.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <cstdint>\n#include <cstddef>\n\n#define CRC_INVALID 0x0\n\nnamespace dsn {\nnamespace utils {\n\nuint32_t crc32_calc(const void *ptr, size_t size, uint32_t init_crc);\n\n//\n// Given\n//      x_final = crc32_calc(x_ptr, x_size, x_init);\n// and\n//      y_final = crc32_calc(y_ptr, y_size, y_init);\n// compute CRC of concatenation of A and B\n//      x##y_crc = crc32_calc(x##y, x_size + y_size, xy_init);\n// without touching A and B\n//\nuint32_t crc32_concat(uint32_t xy_init,\n                      uint32_t x_init,\n                      uint32_t x_final,\n                      size_t x_size,\n                      uint32_t y_init,\n                      uint32_t y_final,\n                      size_t y_size);\n\nuint64_t crc64_calc(const void *ptr, size_t size, uint64_t init_crc);\n\n//\n// Given\n//      x_final = crc64_calc(x_ptr, x_size, x_init);\n// and\n//      y_final = crc64_calc(y_ptr, y_size, y_init);\n// compute CRC of concatenation of A and B\n//      x##y_crc = crc64_calc(x##y, x_size + y_size, xy_init);\n// without touching A and B\n//\nuint64_t crc64_concat(uint32_t xy_init,\n                      uint64_t x_init,\n                      uint64_t x_final,\n                      size_t x_size,\n                      uint64_t y_init,\n                      uint64_t y_final,\n                      size_t y_size);\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/customizable_id.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     dynamic and seperated string to/from integer id mapping\n *     in constrast to defining all enums in a single file\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/ports.h>\n#include <string>\n#include <unordered_map>\n#include <vector>\n\nnamespace dsn {\nnamespace utils {\n\n#define DEFINE_CUSTOMIZED_ID(T, name) __selectany const T name(#name);\n#define DEFINE_CUSTOMIZED_ID_LONG(T, name, ...) __selectany const T name(#name, __VA_ARGS__);\n\n#define DEFINE_CUSTOMIZED_ID_TYPE(T)                                                               \\\n    struct T##_                                                                                    \\\n    {                                                                                              \\\n    };                                                                                             \\\n    typedef dsn::utils::customized_id<T##_> T;\n\ntemplate <typename T>\nclass customized_id_mgr : public dsn::utils::singleton<customized_id_mgr<T>>\n{\npublic:\n    customized_id_mgr() : _names(199) {}\n    int get_id(const char *name) const;\n    int get_id(const std::string &name) const;\n    const char *get_name(int id) const;\n    int register_id(const char *name);\n    int max_value() const { return static_cast<int>(_names2.size()) - 1; }\n\nprivate:\n    std::unordered_map<std::string, int> _names;\n    std::vector<std::string> _names2;\n};\n\ntemplate <typename T>\nstruct customized_id\n{\n    customized_id(const char *name);\n    customized_id(const customized_id &source);\n    operator int() const;\n    operator T() const { return T(_internal_code); }\n    const char *to_string() const;\n    void reset(const customized_id &r);\n\n    static int max_value();\n    static const char *to_string(int code);\n    static bool is_exist(const char *name);\n    static customized_id from_string(const char *name, customized_id invalid_value);\n\nprotected:\n    static int assign(const char *xxx);\n    customized_id(int code);\n\nprotected:\n    int _internal_code;\n\n    // private:\n    //    // no assignment operator\n    //    customized_id<T>& operator=(const customized_id<T>& source);\n};\n\n// -------------------------- inline implementation ----------------------------\n\ntemplate <typename T>\ncustomized_id<T>::customized_id(const char *name) : _internal_code(assign(name))\n{\n}\n\ntemplate <typename T>\ncustomized_id<T>::customized_id(const customized_id &source) : _internal_code(source._internal_code)\n{\n}\n\ntemplate <typename T>\ncustomized_id<T>::operator int() const\n{\n    return _internal_code;\n}\n\ntemplate <typename T>\nconst char *customized_id<T>::to_string() const\n{\n    return customized_id_mgr<T>::instance().get_name(_internal_code);\n}\n\ntemplate <typename T>\nvoid customized_id<T>::reset(const customized_id<T> &r)\n{\n    _internal_code = r._internal_code;\n}\n\ntemplate <typename T>\nint customized_id<T>::max_value()\n{\n    return customized_id_mgr<T>::instance().max_value();\n}\n\ntemplate <typename T>\nconst char *customized_id<T>::to_string(int code)\n{\n    return customized_id_mgr<T>::instance().get_name(code);\n}\n\ntemplate <typename T>\nbool customized_id<T>::is_exist(const char *name)\n{\n    return customized_id_mgr<T>::instance().get_id(name) != -1;\n}\n\ntemplate <typename T>\ncustomized_id<T> customized_id<T>::from_string(const char *name, customized_id invalid_value)\n{\n    int id = customized_id_mgr<T>::instance().get_id(name);\n    if (id == -1)\n        return invalid_value;\n    else\n        return customized_id<T>(id);\n}\n\ntemplate <typename T>\nint customized_id<T>::assign(const char *name)\n{\n    return customized_id_mgr<T>::instance().register_id(name);\n}\n\ntemplate <typename T>\ncustomized_id<T>::customized_id(int code) : _internal_code(code)\n{\n}\n\ntemplate <typename T>\nint customized_id_mgr<T>::get_id(const char *name) const\n{\n    auto it = _names.find(std::string(name));\n    if (it == _names.end())\n        return -1;\n    else\n        return it->second;\n}\n\ntemplate <typename T>\nint customized_id_mgr<T>::get_id(const std::string &name) const\n{\n    auto it = _names.find(name);\n    if (it == _names.end())\n        return -1;\n    else\n        return it->second;\n}\n\ntemplate <typename T>\nconst char *customized_id_mgr<T>::get_name(int id) const\n{\n    if (id < static_cast<int>(_names2.size()))\n        return _names2[id].c_str();\n    else\n        return \"unknown\";\n}\n\ntemplate <typename T>\nint customized_id_mgr<T>::register_id(const char *name)\n{\n    int id = get_id(name);\n    if (-1 != id) {\n        return id;\n    }\n\n    int code = static_cast<int>(_names.size());\n    _names[std::string(name)] = code;\n    _names2.push_back(std::string(name));\n    return code;\n}\n}\n} // end namespace dsn::utils\n"
  },
  {
    "path": "include/dsn/utility/defer.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <utility>\n\nnamespace dsn {\n\n// `defer` is an useful util to implement RAII in golang, much alike\n// `try...finally...` in java. In C++ we used to implement an RAII class\n// wrapping around the resource:\n//\n// ```cpp\n// struct object_raii\n// {\n//   object_raii() {\n//     _obj = c_object_new();\n//   }\n//   ~object_raii() {\n//     c_object_free(_obj);\n//   }\n// private:\n//   c_object *_obj;\n// };\n// ```\n//\n// Now with `defer`, things will be simplified:\n//\n// ```cpp\n// c_object *obj = c_object_new();\n// auto cleanup = dsn::defer([obj]() { c_object_free(obj); });\n// ```\n\ntemplate <typename Func>\nstruct deferred_action\n{\n    explicit deferred_action(Func &&func) noexcept : _func(std::move(func)) {}\n    ~deferred_action() { _func(); }\nprivate:\n    Func _func;\n};\n\ntemplate <typename Func>\ninline deferred_action<Func> defer(Func &&func)\n{\n    return deferred_action<Func>(std::forward<Func>(func));\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/dlib.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     utilities for dynamic libraries\n *\n * Revision history:\n *     Jul., 2016, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#pragma once\n\n#if defined(DSN_IN_CORE)\n#if defined(_WIN32)\n#define DSN_API __declspec(dllexport)\n#else\n#define DSN_API __attribute__((visibility(\"default\")))\n#endif\n#else\n#if defined(_WIN32)\n#define DSN_API __declspec(dllimport)\n#else\n#define DSN_API\n#endif\n#endif\n"
  },
  {
    "path": "include/dsn/utility/endians.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <cassert>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/string_view.h>\n\nnamespace dsn {\n\nnamespace endian {\n\ninline uint8_t hton(uint8_t v) { return v; }\n\ninline uint8_t ntoh(uint8_t v) { return v; }\n\n#if defined(__linux__)\n\ninline uint16_t hton(uint16_t v) { return htobe16(v); }\n\ninline uint32_t hton(uint32_t v) { return htobe32(v); }\n\ninline uint64_t hton(uint64_t v) { return htobe64(v); }\n\ninline uint16_t ntoh(uint16_t v) { return be16toh(v); }\n\ninline uint32_t ntoh(uint32_t v) { return be32toh(v); }\n\ninline uint64_t ntoh(uint64_t v) { return be64toh(v); }\n\n#elif defined(__APPLE__)\n\ninline uint16_t hton(uint16_t v) { return htons(v); }\n\ninline uint32_t hton(uint32_t v) { return htonl(v); }\n\ninline uint64_t hton(uint64_t v) { return htonll(v); }\n\ninline uint16_t ntoh(uint16_t v) { return ntohs(v); }\n\ninline uint32_t ntoh(uint32_t v) { return ntohl(v); }\n\ninline uint64_t ntoh(uint64_t v) { return ntohll(v); }\n#endif // defined(__linux__)\n\n} // namespace endian\n\n// Write data in wire serialization.\nclass data_output\n{\npublic:\n    data_output(char *p, size_t size) : _ptr(p), _end(p + size) {}\n\n    explicit data_output(std::string &s) : data_output(&s[0], s.length()) {}\n\n    data_output &write_u8(uint8_t val) { return write_unsigned(val); }\n\n    data_output &write_u16(uint16_t val) { return write_unsigned(val); }\n\n    data_output &write_u32(uint32_t val) { return write_unsigned(val); }\n\n    data_output &write_u64(uint64_t val) { return write_unsigned(val); }\n\nprivate:\n    template <typename T>\n    data_output &write_unsigned(T val)\n    {\n        static_assert(std::is_unsigned<T>::value, \"T must be unsigned integer\");\n        ensure(sizeof(val));\n\n        val = endian::hton(val);\n        memcpy(_ptr, &val, sizeof(val));\n        _ptr += sizeof(val);\n        return *this;\n    }\n\n    void ensure(size_t sz)\n    {\n        size_t cap = _end - _ptr;\n        assert(cap >= sz);\n    }\n\nprivate:\n    char *_ptr;\n    char *_end;\n};\n\n// Read data that was written in wire serialization.\nclass data_input\n{\npublic:\n    explicit data_input(string_view s) : _p(s.data()), _size(s.size()) {}\n\n    uint8_t read_u8() { return read_unsigned<uint8_t>(); }\n\n    uint16_t read_u16() { return read_unsigned<uint16_t>(); }\n\n    uint32_t read_u32() { return read_unsigned<uint32_t>(); }\n\n    uint64_t read_u64() { return read_unsigned<uint64_t>(); }\n\n    string_view read_str() { return {_p, _size}; }\n\n    void skip(size_t sz)\n    {\n        ensure(sz);\n        advance(sz);\n    }\n\nprivate:\n    template <typename T>\n    T read_unsigned()\n    {\n        static_assert(std::is_unsigned<T>::value, \"T must be unsigned integer\");\n        ensure(sizeof(T));\n\n        T val = 0;\n        memcpy(&val, _p, sizeof(T));\n        val = endian::ntoh(val);\n\n        advance(sizeof(T));\n\n        return val;\n    }\n\n    void advance(size_t sz)\n    {\n        _p += sz;\n        _size -= sz;\n    }\n\n    void ensure(size_t sz) { assert(_size >= sz); }\n\nprivate:\n    const char *_p{nullptr};\n    size_t _size{0};\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/enum_helper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <map>\n#include <string>\n#include <mutex>\n#include <memory>\n\n// an invalid enum value must be provided so as to be the default value when parsing failed\n#define ENUM_BEGIN2(type, name, invalid_value)                                                     \\\n    static inline ::dsn::enum_helper_xxx<type> *RegisterEnu_##name()                               \\\n    {                                                                                              \\\n        ::dsn::enum_helper_xxx<type> *helper = new ::dsn::enum_helper_xxx<type>(invalid_value);\n\n#define ENUM_BEGIN(type, invalid_value) ENUM_BEGIN2(type, type, invalid_value)\n\n#define ENUM_REG(e) helper->register_enum(#e, e);\n\n#define ENUM_END2(type, name)                                                                      \\\n    return helper;                                                                                 \\\n    }                                                                                              \\\n    inline type enum_from_string(const char *s, type invalid_value)                                \\\n    {                                                                                              \\\n        return ::dsn::enum_helper_xxx<type>::instance(RegisterEnu_##name).parse(s);                \\\n    }                                                                                              \\\n    inline const char *enum_to_string(type val)                                                    \\\n    {                                                                                              \\\n        return ::dsn::enum_helper_xxx<type>::instance(RegisterEnu_##name).to_string(val);          \\\n    }\n\n#define ENUM_END(type) ENUM_END2(type, type)\n\nnamespace dsn {\n\ntemplate <typename TEnum>\nclass enum_helper_xxx\n{\nprivate:\n    struct EnumContext\n    {\n        std::string name;\n    };\n\npublic:\n    enum_helper_xxx(TEnum invalid) : _invalid(invalid) {}\n\n    void register_enum(const char *name, TEnum v)\n    {\n        _nameToValue[std::string(name)] = v;\n\n        EnumContext ctx;\n        ctx.name.assign(name);\n        _valueToContext[v] = ctx;\n    }\n\n    TEnum parse(const std::string &name)\n    {\n        auto it = _nameToValue.find(name);\n        return it != _nameToValue.end() ? it->second : _invalid;\n    }\n\n    const char *to_string(TEnum v)\n    {\n        auto it = _valueToContext.find(v);\n        if (it != _valueToContext.end()) {\n            return it->second.name.c_str();\n        } else {\n            return \"Unknown\";\n        }\n    }\n\n    static enum_helper_xxx &instance(enum_helper_xxx<TEnum> *(*registor)())\n    {\n        if (_instance == nullptr) {\n            static std::once_flag flag;\n            std::call_once(flag, [&]() { _instance.reset(registor()); });\n        }\n        return *_instance;\n    }\n\nprivate:\n    static std::unique_ptr<enum_helper_xxx<TEnum>> _instance;\n\nprivate:\n    TEnum _invalid;\n    std::map<TEnum, EnumContext> _valueToContext;\n    std::map<std::string, TEnum> _nameToValue;\n};\n\ntemplate <typename TEnum>\nstd::unique_ptr<enum_helper_xxx<TEnum>> enum_helper_xxx<TEnum>::_instance;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/error_code.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/customizable_id.h>\n#include <thrift/protocol/TProtocol.h>\n\nnamespace dsn {\n\nclass error_code\n{\npublic:\n    explicit error_code(const char *name);\n\n    explicit constexpr error_code(int err) : _internal_code(err) {}\n\n    constexpr error_code() = default;\n\n    const char *to_string() const;\n\n    constexpr bool operator==(const error_code &r) { return _internal_code == r._internal_code; }\n\n    constexpr bool operator!=(const error_code &r) { return !(*this == r); }\n\n    constexpr operator int() const { return _internal_code; }\n\n    // for serialization in thrift format\n    uint32_t read(::apache::thrift::protocol::TProtocol *iprot);\n    uint32_t write(::apache::thrift::protocol::TProtocol *oprot) const;\n\n    static int max();\n    static bool is_exist(const char *name);\n    static error_code try_get(const char *name, error_code default_value);\n    static error_code try_get(const std::string &name, error_code default_value);\n\n    friend std::ostream &operator<<(std::ostream &os, const error_code &ec)\n    {\n        return os << std::string(ec.to_string());\n    }\n\nprivate:\n    int _internal_code{0};\n};\n\n#define DEFINE_ERR_CODE(x) __selectany const dsn::error_code x(#x);\n\n// the following error codes are grouped per 10 items,\n// so that we can get the integer representation for each error_code easily\nDEFINE_ERR_CODE(ERR_OK)\n\nDEFINE_ERR_CODE(ERR_UNKNOWN)\nDEFINE_ERR_CODE(ERR_SERVICE_NOT_FOUND)\nDEFINE_ERR_CODE(ERR_SERVICE_ALREADY_RUNNING)\nDEFINE_ERR_CODE(ERR_IO_PENDING)\nDEFINE_ERR_CODE(ERR_TIMEOUT)\nDEFINE_ERR_CODE(ERR_SERVICE_NOT_ACTIVE)\nDEFINE_ERR_CODE(ERR_BUSY)\nDEFINE_ERR_CODE(ERR_NETWORK_INIT_FAILED)\nDEFINE_ERR_CODE(ERR_FORWARD_TO_OTHERS)\nDEFINE_ERR_CODE(ERR_OBJECT_NOT_FOUND)\n\nDEFINE_ERR_CODE(ERR_HANDLER_NOT_FOUND)\nDEFINE_ERR_CODE(ERR_LEARN_FILE_FAILED)\nDEFINE_ERR_CODE(ERR_GET_LEARN_STATE_FAILED)\nDEFINE_ERR_CODE(ERR_INVALID_VERSION)\nDEFINE_ERR_CODE(ERR_INVALID_PARAMETERS)\nDEFINE_ERR_CODE(ERR_CAPACITY_EXCEEDED)\nDEFINE_ERR_CODE(ERR_INVALID_STATE)\nDEFINE_ERR_CODE(ERR_INACTIVE_STATE)\nDEFINE_ERR_CODE(ERR_NOT_ENOUGH_MEMBER)\nDEFINE_ERR_CODE(ERR_FILE_OPERATION_FAILED)\n\nDEFINE_ERR_CODE(ERR_HANDLE_EOF)\nDEFINE_ERR_CODE(ERR_WRONG_CHECKSUM)\nDEFINE_ERR_CODE(ERR_INVALID_DATA)\nDEFINE_ERR_CODE(ERR_INVALID_HANDLE)\nDEFINE_ERR_CODE(ERR_INCOMPLETE_DATA)\nDEFINE_ERR_CODE(ERR_VERSION_OUTDATED)\nDEFINE_ERR_CODE(ERR_PATH_NOT_FOUND)\nDEFINE_ERR_CODE(ERR_PATH_ALREADY_EXIST)\nDEFINE_ERR_CODE(ERR_ADDRESS_ALREADY_USED)\nDEFINE_ERR_CODE(ERR_STATE_FREEZED)\n\nDEFINE_ERR_CODE(ERR_LOCAL_APP_FAILURE)\nDEFINE_ERR_CODE(ERR_BIND_IOCP_FAILED)\nDEFINE_ERR_CODE(ERR_NETWORK_START_FAILED)\nDEFINE_ERR_CODE(ERR_NOT_IMPLEMENTED)\nDEFINE_ERR_CODE(ERR_CHECKPOINT_FAILED)\nDEFINE_ERR_CODE(ERR_WRONG_TIMING)\nDEFINE_ERR_CODE(ERR_NO_NEED_OPERATE)\nDEFINE_ERR_CODE(ERR_CORRUPTION)\nDEFINE_ERR_CODE(ERR_TRY_AGAIN)\nDEFINE_ERR_CODE(ERR_CLUSTER_NOT_FOUND)\n\nDEFINE_ERR_CODE(ERR_CLUSTER_ALREADY_EXIST)\nDEFINE_ERR_CODE(ERR_SERVICE_ALREADY_EXIST)\nDEFINE_ERR_CODE(ERR_INJECTED)\nDEFINE_ERR_CODE(ERR_REPLICATION_FAILURE)\nDEFINE_ERR_CODE(ERR_APP_EXIST)\nDEFINE_ERR_CODE(ERR_APP_NOT_EXIST)\nDEFINE_ERR_CODE(ERR_BUSY_CREATING)\nDEFINE_ERR_CODE(ERR_BUSY_DROPPING)\nDEFINE_ERR_CODE(ERR_NETWORK_FAILURE)\nDEFINE_ERR_CODE(ERR_UNDER_RECOVERY)\n\nDEFINE_ERR_CODE(ERR_LEARNER_NOT_FOUND)\nDEFINE_ERR_CODE(ERR_OPERATION_DISABLED)\nDEFINE_ERR_CODE(ERR_EXPIRED)\nDEFINE_ERR_CODE(ERR_LOCK_ALREADY_EXIST)\nDEFINE_ERR_CODE(ERR_HOLD_BY_OTHERS)\nDEFINE_ERR_CODE(ERR_RECURSIVE_LOCK)\nDEFINE_ERR_CODE(ERR_NO_OWNER)\nDEFINE_ERR_CODE(ERR_NODE_ALREADY_EXIST)\nDEFINE_ERR_CODE(ERR_INCONSISTENT_STATE)\nDEFINE_ERR_CODE(ERR_ARRAY_INDEX_OUT_OF_RANGE)\n\nDEFINE_ERR_CODE(ERR_DIR_NOT_EMPTY)\nDEFINE_ERR_CODE(ERR_FS_INTERNAL)\nDEFINE_ERR_CODE(ERR_IGNORE_BAD_DATA)\nDEFINE_ERR_CODE(ERR_APP_DROPPED)\nDEFINE_ERR_CODE(ERR_MOCK_INTERNAL)\nDEFINE_ERR_CODE(ERR_ZOOKEEPER_OPERATION)\nDEFINE_ERR_CODE(ERR_CHILD_REGISTERED)\nDEFINE_ERR_CODE(ERR_INGESTION_FAILED)\nDEFINE_ERR_CODE(ERR_UNAUTHENTICATED)\nDEFINE_ERR_CODE(ERR_KRB5_INTERNAL)\n\nDEFINE_ERR_CODE(ERR_SASL_INTERNAL)\nDEFINE_ERR_CODE(ERR_SASL_INCOMPLETE)\nDEFINE_ERR_CODE(ERR_ACL_DENY)\nDEFINE_ERR_CODE(ERR_SPLITTING)\nDEFINE_ERR_CODE(ERR_PARENT_PARTITION_MISUSED)\nDEFINE_ERR_CODE(ERR_CHILD_NOT_READY)\nDEFINE_ERR_CODE(ERR_DISK_INSUFFICIENT)\nDEFINE_ERR_CODE(ERR_RETRY_EXHAUSTED)\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/errors.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/error_code.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/string_view.h>\n#include <dsn/c/api_utilities.h>\n\n#include <sstream>\n\nnamespace dsn {\n\n// error_s gives a detailed description of the error tagged by error_code.\n// For example:\n//\n//   error_s open_file(std::string file_name) {\n//       if(file_name.empty()) {\n//           return error_s::make(ERR_INVALID_PARAMETERS, \"file name should not be empty\");\n//       }\n//       return error_s::ok();\n//   }\n//\n//   error_s err = open_file(\"\");\n//   if (!err.is_ok()) {\n//       std::cerr << s.description() << std::endl;\n//       // print: \"ERR_INVALID_PARAMETERS: file name should not be empty\"\n//   }\n//\nclass error_s\n{\npublic:\n    constexpr error_s() noexcept = default;\n\n    ~error_s() = default;\n\n    // copyable\n    error_s(const error_s &rhs) noexcept { copy(rhs); }\n    error_s &operator=(const error_s &rhs) noexcept\n    {\n        copy(rhs);\n        return (*this);\n    }\n\n    // movable\n    error_s(error_s &&rhs) noexcept = default;\n    error_s &operator=(error_s &&) noexcept = default;\n\n    static error_s make(error_code code, dsn::string_view reason) { return error_s(code, reason); }\n\n    static error_s make(error_code code)\n    {\n        // fast path\n        if (code == ERR_OK) {\n            return {};\n        }\n        return make(code, \"\");\n    }\n\n    // Return a success status.\n    // This function is almost zero-cost since the returned object contains\n    // merely a null pointer.\n    static error_s ok() { return error_s(); }\n\n    bool is_ok() const\n    {\n        if (_info) {\n            return _info->code == ERR_OK;\n        }\n        return true;\n    }\n\n    std::string description() const\n    {\n        if (!_info) {\n            return ERR_OK.to_string();\n        }\n        std::string code = _info->code.to_string();\n        return _info->msg.empty() ? code : code + \": \" + _info->msg;\n    }\n\n    error_code code() const { return _info ? error_code(_info->code) : ERR_OK; }\n\n    error_s &operator<<(const char str[])\n    {\n        if (_info) {\n            _info->msg.append(\" << \");\n            _info->msg.append(str);\n            // It's fine for operator<< being applied to an OK Status.\n        }\n        return (*this);\n    }\n\n    template <class T>\n    error_s &operator<<(T v)\n    {\n        if (_info) {\n            std::ostringstream oss;\n            oss << v;\n            (*this) << oss.str().c_str();\n        }\n        return *this;\n    }\n\npublic:\n    friend std::ostream &operator<<(std::ostream &os, const error_s &s)\n    {\n        return os << s.description();\n    }\n\n    friend bool operator==(const error_s lhs, const error_s &rhs)\n    {\n        if (lhs._info && rhs._info) {\n            return lhs._info->code == rhs._info->code && lhs._info->msg == rhs._info->msg;\n        }\n        return lhs._info == rhs._info;\n    }\n\nprivate:\n    error_s(error_code code, dsn::string_view msg) noexcept : _info(new error_info(code, msg)) {}\n\n    struct error_info\n    {\n        error_code code;\n        std::string msg; // TODO(wutao1): use raw char* to improve performance?\n\n        error_info(error_code c, dsn::string_view s) : code(c), msg(s) {}\n    };\n\n    void copy(const error_s &rhs)\n    {\n        if (rhs._info == _info) {\n            return;\n        }\n        if (!rhs._info) {\n            _info.reset();\n        } else if (!_info) {\n            _info = make_unique<error_info>(rhs._info->code, rhs._info->msg);\n        } else {\n            _info->code = rhs._info->code;\n            _info->msg = rhs._info->msg;\n        }\n    }\n\nprivate:\n    std::unique_ptr<error_info> _info;\n};\n\n// error_with is used to return an error or a value.\n// For example:\n//\n//   error_with<int> result = ...;\n//   if (!s.is_ok()) {\n//       cerr << s.get_error().description()) << endl;\n//   } else {\n//       cerr << s.get_value() << endl;\n//   }\n//\ntemplate <typename T>\nclass error_with\n{\npublic:\n    // for ok case\n    error_with(const T &value) : _value(new T(value)) {}\n    error_with(T &&value) : _value(new T(std::move(value))) {}\n\n    // for error case\n    error_with(error_s &&err) : _err(std::move(err)) { assert(!_err.is_ok()); }\n    error_with(const error_s &status) : _err(status) { assert(!_err.is_ok()); }\n\n    const T &get_value() const\n    {\n        dassert(_err.is_ok(), \"%s\", get_error().description().data());\n        return *_value;\n    }\n\n    T &get_value()\n    {\n        dassert(_err.is_ok(), \"%s\", get_error().description().data());\n        return *_value;\n    }\n\n    const error_s &get_error() const { return _err; }\n\n    error_s &get_error() { return _err; }\n\n    bool is_ok() const { return _err.is_ok(); }\n\nprivate:\n    error_s _err;\n    std::unique_ptr<T> _value;\n};\n\n} // namespace dsn\n\n#define FMT_ERR(ec, msg, args...) error_s::make(ec, fmt::format(msg, ##args))\n\n#define RETURN_NOT_OK(s)                                                                           \\\n    do {                                                                                           \\\n        const ::dsn::error_s &_s = (s);                                                            \\\n        if (!_s.is_ok())                                                                           \\\n            return _s;                                                                             \\\n    } while (false);\n"
  },
  {
    "path": "include/dsn/utility/exp_delay.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     delay for admission control\n *\n * Revision history:\n *     Nov., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/singleton.h>\n#include <vector>\n#include <cassert>\n\nnamespace dsn {\n#define DELAY_COUNT 6\nconst double s_default_delay_points[DELAY_COUNT] = {1.0, 1.2, 1.4, 1.6, 1.8, 2.0};\nconst int s_default_delay[DELAY_COUNT] = {0, 0, 1, 2, 5, 10}; // millieseconds\n\nclass exp_delay\n{\npublic:\n    exp_delay()\n    {\n        memcpy((void *)_delay, (const void *)s_default_delay, sizeof(_delay));\n        _threshold = 0x0fffffff;\n    }\n\n    void initialize(const std::vector<int> &delays, int threshold)\n    {\n        assert((int)delays.size() == DELAY_COUNT);\n\n        int i = 0;\n        for (auto &d : delays) {\n            _delay[i++] = d;\n        }\n        _threshold = threshold;\n    }\n\n    void initialize(int threshold) { _threshold = threshold; }\n\n    inline int delay(int value)\n    {\n        if (value >= _threshold) {\n            double f = (double)value / (double)_threshold;\n            int delay_milliseconds;\n\n            if (f < s_default_delay_points[DELAY_COUNT - 1]) {\n                int idx = static_cast<int>((f - 1.0) / 0.2);\n                delay_milliseconds = _delay[idx];\n            } else {\n                delay_milliseconds = _delay[DELAY_COUNT - 1];\n            }\n\n            return delay_milliseconds;\n        } else {\n            return 0;\n        }\n    }\n\nprivate:\n    int _delay[DELAY_COUNT];\n    int _threshold;\n};\n\nclass shared_exp_delay\n{\npublic:\n    shared_exp_delay() { memcpy((void *)_delay, (const void *)s_default_delay, sizeof(_delay)); }\n\n    void initialize(const std::vector<int> &delays)\n    {\n        assert((int)delays.size() == DELAY_COUNT);\n\n        int i = 0;\n        for (auto &d : delays) {\n            _delay[i++] = d;\n        }\n    }\n\n    inline int delay(int value, int threshold)\n    {\n        if (value >= threshold) {\n            double f = (double)value / (double)threshold;\n            int delay_milliseconds;\n\n            if (f < s_default_delay_points[DELAY_COUNT - 1]) {\n                int idx = static_cast<int>((f - 1.0) / 0.2);\n                delay_milliseconds = _delay[idx];\n            } else {\n                delay_milliseconds = _delay[DELAY_COUNT - 1];\n            }\n\n            return delay_milliseconds;\n        } else {\n            return 0;\n        }\n    }\n\nprivate:\n    int _delay[DELAY_COUNT];\n};\n}\n"
  },
  {
    "path": "include/dsn/utility/extensible_object.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     state extension for cpp objects\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/utils.h>\n#include <vector>\n#include <atomic>\n#include <cstring>\n#include <cassert>\n\nnamespace dsn {\n/*!\n@addtogroup tool-api-providers\n@{\n*/\ntypedef void (*extension_deletor)(void *);\ntypedef void *(*extension_creator)(void *);\n\nclass extensible\n{\npublic:\n    extensible(uint64_t *ptr, uint32_t count)\n    {\n        _ptr = ptr;\n        _count = count;\n    }\n\n    void set_extension(uint32_t id, uint64_t data)\n    {\n        assert(id < _count);\n        _ptr[id] = data;\n    }\n\n    uint64_t &get_extension(uint32_t id)\n    {\n        assert(id < _count);\n        return _ptr[id];\n    }\n\nprivate:\n    uint64_t *_ptr;\n    uint32_t _count;\n};\n\ntemplate <typename T, const int MAX_EXTENSION_COUNT>\nclass extensible_object : public extensible\n{\npublic:\n    static const uint32_t INVALID_SLOT = 0xffffffff;\n    static const uint64_t INVALID_VALUE = 0x0ULL;\n\npublic:\n    extensible_object() : extensible(_extensions, MAX_EXTENSION_COUNT)\n    {\n        memset((void *)_extensions, 0, sizeof(_extensions));\n    }\n\n    ~extensible_object()\n    {\n        int maxId = static_cast<int>(get_extension_count());\n\n        for (int i = 0; i < maxId; i++) {\n            if (_extensions[i] != extensible_object::INVALID_VALUE &&\n                s_extensionDeletors[i] != nullptr) {\n                s_extensionDeletors[i]((void *)_extensions[i]);\n            }\n        }\n    }\n\n    void copy_to(extensible_object<T, MAX_EXTENSION_COUNT> &r)\n    {\n        int maxId = static_cast<int>(get_extension_count());\n\n        for (int i = 0; i < maxId; i++) {\n            if (s_extensionDeletors[i] == nullptr) {\n                r._extensions[i] = _extensions[i];\n            }\n        }\n    }\n\n    static uint32_t register_extension(extension_deletor deletor = nullptr)\n    {\n        int idx = s_nextExtensionIndex++;\n        if (idx < MAX_EXTENSION_COUNT) {\n            s_extensionDeletors[idx] = deletor;\n        } else {\n            idx = INVALID_SLOT;\n            assert(!\"allocate extension failed, not enough slots available\");\n        }\n        return idx;\n    }\n\n    static uint32_t get_extension_count() { return s_nextExtensionIndex.load(); }\n\nprivate:\n    uint64_t _extensions[MAX_EXTENSION_COUNT];\n    static extension_deletor s_extensionDeletors[MAX_EXTENSION_COUNT];\n    static std::atomic<uint32_t> s_nextExtensionIndex;\n};\n\n/*!\nExtensionHelper\n\nsteps to use an ExtensionHelper\n- implement an ExtensionHelper class, e.g.\n    class F : public ExtensionHelper<F, T>, make sure T is an extension_object.\n- add extra information as member fields of class F.\n- invoke F::register() at system initialization\n- use F::get(host_object) to retrive F object where host_object is of T type.\n- once F object is here, you can access your extra information freely.\n */\n\ntemplate <typename TPlaceholder, typename TExtensibleObject>\nclass uint64_extension_helper\n{\npublic:\n    static uint32_t register_ext()\n    {\n        s_slotIdx = TExtensibleObject::register_extension();\n        return s_slotIdx;\n    }\n\n    static uint64_t &get(TExtensibleObject *ctx) { return ctx->get_extension(s_slotIdx); }\n\n    static void set(TExtensibleObject *ctx, uint64_t ext) { ctx->set_extension(s_slotIdx, ext); }\n\nprivate:\n    static uint32_t s_slotIdx;\n};\n\ntemplate <typename TExtension, typename TExtensibleObject>\nclass object_extension_helper\n{\npublic:\n    static uint32_t register_ext(extension_deletor deletor = nullptr)\n    {\n        s_slotIdx = TExtensibleObject::register_extension(deletor);\n        s_deletor = deletor;\n        return s_slotIdx;\n    }\n\n    static uint32_t register_ext(extension_creator creator, extension_deletor deletor)\n    {\n        s_slotIdx = TExtensibleObject::register_extension(deletor);\n        s_creator = creator;\n        s_deletor = deletor;\n        return s_slotIdx;\n    }\n\n    static TExtension *get(TExtensibleObject *ctx)\n    {\n        uint64_t &val = ctx->get_extension(s_slotIdx);\n        return (TExtension *)val;\n    }\n\n    static void set(TExtensibleObject *ctx, TExtension *ext)\n    {\n        ctx->set_extension(s_slotIdx, (uint64_t)ext);\n    }\n\n    static TExtension *get_inited(TExtensibleObject *ctx)\n    {\n        uint64_t &val = ctx->get_extension(s_slotIdx);\n        if (val != TExtensibleObject::INVALID_VALUE)\n            return (TExtension *)val;\n\n        if (s_creator == nullptr) {\n            TExtension *obj = new TExtension();\n            val = (uint64_t)obj;\n        } else {\n            val = (uint64_t)s_creator(ctx);\n        }\n\n        return (TExtension *)val;\n    }\n\n    static void clear(TExtensibleObject *ctx)\n    {\n        uint64_t &val = ctx->get_extension(s_slotIdx);\n        if (val != TExtensibleObject::INVALID_VALUE) {\n            s_deletor((TExtension *)val);\n            val = TExtensibleObject::INVALID_VALUE;\n        }\n    }\n\nprivate:\n    static uint32_t s_slotIdx;\n    static extension_deletor s_deletor;\n    static extension_creator s_creator;\n};\n\n//--- inline implementation -----------\ntemplate <typename T, const int MAX_EXTENSION_COUNT>\nextension_deletor\n    extensible_object<T, MAX_EXTENSION_COUNT>::s_extensionDeletors[MAX_EXTENSION_COUNT];\ntemplate <typename T, const int MAX_EXTENSION_COUNT>\nstd::atomic<uint32_t> extensible_object<T, MAX_EXTENSION_COUNT>::s_nextExtensionIndex(0);\n\ntemplate <typename TPlaceholder, typename TExtensibleObject>\nuint32_t uint64_extension_helper<TPlaceholder, TExtensibleObject>::s_slotIdx = 0;\n\ntemplate <typename TExtension, typename TExtensibleObject>\nuint32_t object_extension_helper<TExtension, TExtensibleObject>::s_slotIdx = 0;\ntemplate <typename TExtension, typename TExtensibleObject>\nextension_deletor object_extension_helper<TExtension, TExtensibleObject>::s_deletor = nullptr;\ntemplate <typename TExtension, typename TExtensibleObject>\nextension_creator object_extension_helper<TExtension, TExtensibleObject>::s_creator = nullptr;\n/*@}*/\n} // end namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/factory_store.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     factory registry for object creation\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/singleton_store.h>\n\nnamespace dsn {\n\nenum provider_type\n{\n    PROVIDER_TYPE_MAIN = 0,\n    PROVIDER_TYPE_ASPECT = 1\n};\n\nnamespace utils {\n\ntemplate <typename TResult>\nclass factory_store\n{\npublic:\n    template <typename TFactory>\n    static bool register_factory(const char *name, TFactory factory, ::dsn::provider_type type)\n    {\n        factory_entry entry;\n        entry.dummy = nullptr;\n        entry.factory = (void *)factory;\n        entry.type = type;\n        return singleton_store<std::string, factory_entry>::instance().put(std::string(name),\n                                                                           entry);\n    }\n\n    template <typename TFactory>\n    static TFactory get_factory(const char *name, ::dsn::provider_type type)\n    {\n        factory_entry entry;\n        if (singleton_store<std::string, factory_entry>::instance().get(std::string(name), entry)) {\n            if (entry.type != type) {\n                report_error(name, type);\n                return nullptr;\n            } else {\n                TFactory f;\n                f = *(TFactory *)&entry.factory;\n                return f;\n            }\n        } else {\n            report_error(name, type);\n            return nullptr;\n        }\n    }\n\n    template <typename T1, typename T2, typename T3, typename T4, typename T5>\n    static TResult *\n    create(const char *name, ::dsn::provider_type type, T1 t1, T2 t2, T3 t3, T4 t4, T5 t5)\n    {\n        typedef TResult *(*TFactory)(T1, T2, T3, T4, T5);\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f(t1, t2, t3, t4, t5) : nullptr;\n    }\n\n    template <typename T1, typename T2, typename T3, typename T4>\n    static TResult *create(const char *name, ::dsn::provider_type type, T1 t1, T2 t2, T3 t3, T4 t4)\n    {\n        typedef TResult *(*TFactory)(T1, T2, T3, T4);\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f(t1, t2, t3, t4) : nullptr;\n    }\n\n    template <typename T1, typename T2, typename T3>\n    static TResult *create(const char *name, ::dsn::provider_type type, T1 t1, T2 t2, T3 t3)\n    {\n        typedef TResult *(*TFactory)(T1, T2, T3);\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f(t1, t2, t3) : nullptr;\n    }\n\n    template <typename T1, typename T2>\n    static TResult *create(const char *name, ::dsn::provider_type type, T1 t1, T2 t2)\n    {\n        typedef TResult *(*TFactory)(T1, T2);\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f(t1, t2) : nullptr;\n    }\n\n    template <typename T1>\n    static TResult *create(const char *name, ::dsn::provider_type type, T1 t1)\n    {\n        typedef TResult *(*TFactory)(T1);\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f(t1) : nullptr;\n    }\n\n    static TResult *create(const char *name, ::dsn::provider_type type)\n    {\n        typedef TResult *(*TFactory)();\n        TFactory f = get_factory<TFactory>(name, type);\n        return f ? f() : nullptr;\n    }\n\nprivate:\n    static void report_error(const char *name, ::dsn::provider_type type)\n    {\n        printf(\"cannot find factory '%s' with factory type %s\\n\",\n               name,\n               type == PROVIDER_TYPE_MAIN ? \"provider\" : \"aspect\");\n\n        std::vector<std::string> keys;\n        singleton_store<std::string, factory_entry>::instance().get_all_keys(keys);\n        printf(\"\\tthe following %u factories are registered:\\n\", static_cast<int>(keys.size()));\n        for (auto it = keys.begin(); it != keys.end(); ++it) {\n            factory_entry entry;\n            singleton_store<std::string, factory_entry>::instance().get(*it, entry);\n            printf(\"\\t\\t%s (type: %s)\\n\",\n                   it->c_str(),\n                   entry.type == PROVIDER_TYPE_MAIN ? \"provider\" : \"aspect\");\n        }\n        printf(\"\\tPlease specify the correct factory name in your tool_app or in configuration \"\n               \"file\\n\");\n    }\n\nprivate:\n    struct factory_entry\n    {\n        TResult *dummy;\n        void *factory;\n        ::dsn::provider_type type;\n\n        factory_entry()\n        {\n            dummy = nullptr;\n            factory = nullptr;\n            type = PROVIDER_TYPE_MAIN;\n        }\n    };\n};\n}\n} // end namespace dsn::utils\n"
  },
  {
    "path": "include/dsn/utility/fail_point.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n// Copyright 2017 PingCAP, Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n/// A fail point implementation in C++.\n/// This lib is ported from https://github.com/pingcap/fail-rs.\n\n#include <dsn/utility/string_view.h>\n\n/// The only entry to define a fail point with `return` function: lambda function must be\n/// return non-void type. When a fail point is defined, it's referenced via the name.\n#define FAIL_POINT_INJECT_F(name, lambda)                                                          \\\n    do {                                                                                           \\\n        if (dsn_likely(!::dsn::fail::_S_FAIL_POINT_ENABLED))                                       \\\n            break;                                                                                 \\\n        auto __Func = lambda;                                                                      \\\n        auto __Res = ::dsn::fail::eval(name);                                                      \\\n        if (__Res != nullptr) {                                                                    \\\n            return __Func(*__Res);                                                                 \\\n        }                                                                                          \\\n    } while (0)\n\n/// The only entry to define a fail point with `not return` function: lambda function usually\n/// return void type. When a fail point is defined, it's referenced via the name.\n#define FAIL_POINT_INJECT_NOT_RETURN_F(name, lambda)                                               \\\n    do {                                                                                           \\\n        if (dsn_likely(!::dsn::fail::_S_FAIL_POINT_ENABLED))                                       \\\n            break;                                                                                 \\\n        auto __Func = lambda;                                                                      \\\n        auto __Res = ::dsn::fail::eval(name);                                                      \\\n        if (__Res != nullptr) {                                                                    \\\n            __Func(*__Res);                                                                        \\\n        }                                                                                          \\\n    } while (0)\n\nnamespace dsn {\nnamespace fail {\n\nextern const std::string *eval(dsn::string_view name);\n\n/// Set new actions to a fail point at runtime.\n/// The format of an action is `[p%][cnt*]task[(arg)]`. `p%` is the expected probability that\n/// the action is triggered, and `cnt*` is the max times the action can be triggered.\n/// For example, `20%3*print(still alive!)` means the fail point has 20% chance to print a\n/// message \"still alive!\". And the message will be printed at most 3 times.\nextern void cfg(dsn::string_view name, dsn::string_view action);\n\nextern void setup();\n\n/// Tear down the fail point system.\nextern void teardown();\n\nextern bool _S_FAIL_POINT_ENABLED;\n\n} // namespace fail\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/filesystem.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <string>\n#include <dsn/utility/error_code.h>\n\n#ifndef _XOPEN_SOURCE\n#define _XOPEN_SOURCE 500\n#endif\n\n#include <ftw.h>\n\n#ifndef FTW_CONTINUE\n#define FTW_CONTINUE 0\n#endif\n\n#ifndef FTW_STOP\n#define FTW_STOP 1\n#endif\n\n#ifndef FTW_SKIP_SUBTREE\n#define FTW_SKIP_SUBTREE 2\n#endif\n\n#ifndef FTW_SKIP_SIBLINGS\n#define FTW_SKIP_SIBLINGS 3\n#endif\n\nnamespace dsn {\nnamespace utils {\nnamespace filesystem {\n\nint get_normalized_path(const std::string &path, std::string &npath);\n\nbool get_absolute_path(const std::string &path1, std::string &path2);\n\nstd::string remove_file_name(const std::string &path);\n\nstd::string get_file_name(const std::string &path);\n\nstd::string path_combine(const std::string &path1, const std::string &path2);\n\ntypedef std::function<int(const char *, int, struct FTW *)> ftw_handler;\nbool file_tree_walk(const std::string &dirpath, ftw_handler handler, bool recursive = true);\n\nbool path_exists(const std::string &path);\n\nbool directory_exists(const std::string &path);\n\nbool file_exists(const std::string &path);\n\nbool get_subfiles(const std::string &path, std::vector<std::string> &sub_list, bool recursive);\n\nbool get_subdirectories(const std::string &path,\n                        std::vector<std::string> &sub_list,\n                        bool recursive);\n\nbool get_subpaths(const std::string &path, std::vector<std::string> &sub_list, bool recursive);\n\n// Returns true if no error.\nbool remove_path(const std::string &path);\n\n// this will always remove target path if exist\nbool rename_path(const std::string &path1, const std::string &path2);\n\nbool file_size(const std::string &path, int64_t &sz);\n\nbool create_directory(const std::string &path);\n\nbool create_file(const std::string &path);\n\nbool get_current_directory(std::string &path);\n\nbool last_write_time(const std::string &path, time_t &tm);\n\nerror_code get_process_image_path(int pid, std::string &path);\n\ninline error_code get_current_process_image_path(std::string &path)\n{\n    auto err = dsn::utils::filesystem::get_process_image_path(-1, path);\n    assert(err == ERR_OK);\n    return err;\n}\n\nstruct disk_space_info\n{\n    // all values are byte counts\n    uint64_t capacity;\n    uint64_t available;\n};\nbool get_disk_space_info(const std::string &path, disk_space_info &info);\n\nbool link_file(const std::string &src, const std::string &target);\n\nerror_code md5sum(const std::string &file_path, /*out*/ std::string &result);\n\n// return value:\n//  - <A, B>:\n//          A is represent whether operation encounter some local error\n//          B is represent wheter the directory is empty, true means empty, otherwise false\nstd::pair<error_code, bool> is_directory_empty(const std::string &dirname);\n\nerror_code read_file(const std::string &fname, /*out*/ std::string &buf);\n\n// compare file metadata calculated by fname with expected md5 and file_size\nbool verify_file(const std::string &fname,\n                 const std::string &expected_md5,\n                 const int64_t &expected_fsize);\n\nbool verify_file_size(const std::string &fname, const int64_t &expected_fsize);\n\nbool verify_data_md5(const std::string &fname,\n                     const char *data,\n                     const size_t data_size,\n                     const std::string &expected_md5);\n\n// create driectory and get absolute path\nbool create_directory(const std::string &path,\n                      /*out*/ std::string &absolute_path,\n                      /*out*/ std::string &err_msg);\n\nbool write_file(const std::string &fname, std::string &buf);\n\n// check if directory is readable and writable\n// call `create_directory` before to make `path` exist\nbool check_dir_rw(const std::string &path, /*out*/ std::string &err_msg);\n\n} // namespace filesystem\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/fixed_size_buffer_pool.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n///\n/// A simple buffer pool designed for efficiently formatting\n/// frequently used types (like gpid, rpc_address) into string,\n/// without dynamic memory allocation.\n///\n/// It's not suitable to be used in multi-threaded environment,\n/// unless when it's declared as thread local.\n///\n/// \\see dsn_address_to_string\n/// \\see dsn::gpid::to_string\n///\ntemplate <unsigned int PoolCapacity, unsigned int ChunkSize>\nclass fixed_size_buffer_pool\n{\nprivate:\n    char buffer[PoolCapacity][ChunkSize];\n    unsigned int index;\n\npublic:\n    constexpr unsigned int get_chunk_size() const { return ChunkSize; }\n    char *next()\n    {\n        // we must update index first, coz the index may be uninitialized\n        // the reason we don't initialize the buffer/index in constructor\n        // is that the round_buffer may be declared as thread_local variable\n        index = (index + 1) % PoolCapacity;\n        return buffer[index];\n    }\n};\n"
  },
  {
    "path": "include/dsn/utility/flags.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n#include <cstdint>\n#include <functional>\n#include \"errors.h\"\n#include \"enum_helper.h\"\n#include \"utils.h\"\n\nenum class flag_tag\n{\n    FT_MUTABLE = 0, /** flag data is mutable */\n    FV_MAX_INDEX = 0,\n};\n\nENUM_BEGIN(flag_tag, flag_tag::FV_MAX_INDEX)\nENUM_REG(flag_tag::FT_MUTABLE)\nENUM_END(flag_tag)\n\n// support std::hash with enum types is implemented since gcc 6.1\n// so we should define hash for flag_tag to compatible with gcc < 6.1\nnamespace std {\ntemplate <>\nstruct hash<flag_tag>\n{\n    size_t operator()(const flag_tag &t) const { return size_t(t); }\n};\n} // namespace std\n\n// Example:\n//    DSN_DEFINE_string(\"core\", filename, \"my_file.txt\", \"The file to read\");\n//    DSN_DEFINE_validator(filename, [](const char *fname){ return is_file(fname); });\n//    auto fptr = file::open(FLAGS_filename, O_RDONLY | O_BINARY, 0);\n\n#define DSN_DECLARE_VARIABLE(type, name) extern type FLAGS_##name\n\n#define DSN_DECLARE_int32(name) DSN_DECLARE_VARIABLE(int32_t, name)\n#define DSN_DECLARE_uint32(name) DSN_DECLARE_VARIABLE(uint32_t, name)\n#define DSN_DECLARE_int64(name) DSN_DECLARE_VARIABLE(int64_t, name)\n#define DSN_DECLARE_uint64(name) DSN_DECLARE_VARIABLE(uint64_t, name)\n#define DSN_DECLARE_double(name) DSN_DECLARE_VARIABLE(double, name)\n#define DSN_DECLARE_bool(name) DSN_DECLARE_VARIABLE(bool, name)\n#define DSN_DECLARE_string(name) DSN_DECLARE_VARIABLE(const char *, name)\n\n#define DSN_DEFINE_VARIABLE(type, section, name, default_value, desc)                              \\\n    type FLAGS_##name = default_value;                                                             \\\n    static dsn::flag_registerer FLAGS_REG_##name(section, #name, desc, &FLAGS_##name)\n\n#define DSN_DEFINE_int32(section, name, val, desc)                                                 \\\n    DSN_DEFINE_VARIABLE(int32_t, section, name, val, desc)\n#define DSN_DEFINE_uint32(section, name, val, desc)                                                \\\n    DSN_DEFINE_VARIABLE(uint32_t, section, name, val, desc)\n#define DSN_DEFINE_int64(section, name, val, desc)                                                 \\\n    DSN_DEFINE_VARIABLE(int64_t, section, name, val, desc)\n#define DSN_DEFINE_uint64(section, name, val, desc)                                                \\\n    DSN_DEFINE_VARIABLE(uint64_t, section, name, val, desc)\n#define DSN_DEFINE_double(section, name, val, desc)                                                \\\n    DSN_DEFINE_VARIABLE(double, section, name, val, desc)\n#define DSN_DEFINE_bool(section, name, val, desc)                                                  \\\n    DSN_DEFINE_VARIABLE(bool, section, name, val, desc)\n#define DSN_DEFINE_string(section, name, val, desc)                                                \\\n    DSN_DEFINE_VARIABLE(const char *, section, name, val, desc)\n\n// Convenience macro for the registration of a flag validator.\n// `validator` must be a std::function<bool(FLAG_TYPE)> and receives the flag value as argument,\n// returns true if validation passed.\n// The program corrupts if the validation failed.\n#define DSN_DEFINE_validator(name, validator)                                                      \\\n    static auto FLAGS_VALIDATOR_FN_##name = validator;                                             \\\n    static const dsn::flag_validator FLAGS_VALIDATOR_##name(                                       \\\n        #name, []() -> bool { return FLAGS_VALIDATOR_FN_##name(FLAGS_##name); })\n\n// There are scenarios where inconsistency should be detected and avoided between 2 or more flags.\n//\n// For example, FLAGS_a and FLAGS_b are mutually exclusive: they cannot both be true.\n// Therefore, a validator may be something like:\n// bool validate() {\n//     return !FLAGS_a || !FLAGS_b;\n// }\n//\n// Another example is that FLAGS_c must be less than FLAGS_d. As for this example,\n// a validator can be implemented as:\n// bool validate() {\n//     return FLAGS_c < FLAGS_d;\n// }\n//\n// Unfortunately, `flag_validator` is used to validate the value of individual\n// flag without involving others. Once another flag is used in `flag_validator`,\n// perhaps the validation is ineffective since that flag may not have been loaded\n// from the configuration file.\n//\n// We use grouped flag validator to detect the inconsistency between 2 or more flags.\n// In contrast with `flag_validator` for individual flag, `group_flag_validator` has a guarantee\n// that it will be run after all flags have been loaded from the configuration file.\n//\n// This is the convenient macro for the registration of a grouped flag validator.\n// `validator` must be a std::function<bool(std::string &)>. It does not receive any input\n// argument, but return true if the validation passed otherwise false, with a hint message\n// set as the output argument `std::string &`, if any.\n#define DSN_DEFINE_group_validator(name, validator)                                                \\\n    static const dsn::group_flag_validator FLAGS_GROUP_VALIDATOR_##name(#name, validator)\n\n#define DSN_TAG_VARIABLE(name, tag)                                                                \\\n    COMPILE_ASSERT(sizeof(decltype(FLAGS_##name)), exist_##name##_##tag);                          \\\n    static dsn::flag_tagger FLAGS_TAGGER_##name##_##tag(#name, flag_tag::tag)\n\nnamespace dsn {\n\n// An utility class that registers a flag upon initialization.\nclass flag_registerer\n{\npublic:\n    flag_registerer(const char *section, const char *name, const char *desc, int32_t *val);\n    flag_registerer(const char *section, const char *name, const char *desc, uint32_t *val);\n    flag_registerer(const char *section, const char *name, const char *desc, int64_t *val);\n    flag_registerer(const char *section, const char *name, const char *desc, uint64_t *val);\n    flag_registerer(const char *section, const char *name, const char *desc, double *val);\n    flag_registerer(const char *section, const char *name, const char *desc, bool *val);\n    flag_registerer(const char *section, const char *name, const char *desc, const char **val);\n};\n\n// An utility class that registers a validator upon initialization.\nusing validator_fn = std::function<bool()>;\nclass flag_validator\n{\npublic:\n    flag_validator(const char *name, validator_fn);\n};\n\n// An utility class that registers a grouped validator upon initialization.\nusing group_validator_fn = std::function<bool(std::string &)>;\nclass group_flag_validator\n{\npublic:\n    group_flag_validator(const char *name, group_validator_fn);\n};\n\nclass flag_tagger\n{\npublic:\n    flag_tagger(const char *name, const flag_tag &tag);\n};\n\n// Loads all the flags from configuration.\nextern void flags_initialize();\n\n// update the specified flag to val\nextern error_s update_flag(const std::string &name, const std::string &val);\n\n// determine if the tag is exist for the specified flag\nextern bool has_tag(const std::string &name, const flag_tag &tag);\n\n// list all the flags\nextern std::string list_all_flags();\n\n// get the json string of a specified flag\nextern error_with<std::string> get_flag_str(const std::string &flag_name);\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/function_traits.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Function traits to help extract the type of various callbacks\n *\n * Revision history:\n *     2016-01-15, Tianyi Wang, first version\n */\n\n#pragma once\n\n#include <type_traits>\n\nnamespace dsn {\ntemplate <typename T>\nstruct function_traits : public function_traits<decltype(&T::operator())>\n{\n};\ntemplate <typename ReturnType, typename... Args>\nstruct function_traits<ReturnType(Args...)>\n{\n    using return_t = ReturnType;\n    static constexpr size_t const arity = sizeof...(Args);\n    template <size_t i>\n    using arg_t = typename std::tuple_element<i, std::tuple<Args...>>::type;\n};\n\ntemplate <typename ReturnType, typename... Args>\nstruct function_traits<ReturnType (*)(Args...)> : public function_traits<ReturnType(Args...)>\n{\n};\n\ntemplate <typename ClassType, typename ReturnType, typename... Args>\nstruct function_traits<ReturnType (ClassType::*)(Args...)>\n    : public function_traits<ReturnType(Args...)>\n{\n    typedef ClassType &owner_type;\n};\n\ntemplate <typename ClassType, typename ReturnType, typename... Args>\nstruct function_traits<ReturnType (ClassType::*)(Args...) const>\n    : public function_traits<ReturnType(Args...)>\n{\n    typedef const ClassType &owner_type;\n};\n\ntemplate <typename ClassType, typename ReturnType, typename... Args>\nstruct function_traits<ReturnType (ClassType::*)(Args...) volatile>\n    : public function_traits<ReturnType(Args...)>\n{\n    typedef volatile ClassType &owner_type;\n};\n\ntemplate <typename ClassType, typename ReturnType, typename... Args>\nstruct function_traits<ReturnType (ClassType::*)(Args...) const volatile>\n    : public function_traits<ReturnType(Args...)>\n{\n    typedef const volatile ClassType &owner_type;\n};\n\ntemplate <typename FunctionType>\nstruct function_traits<std::function<FunctionType>> : public function_traits<FunctionType>\n{\n};\ntemplate <typename T>\nstruct function_traits<T &> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<const T &> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<volatile T &> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<const volatile T &> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<T &&> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<const T &&> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<volatile T &&> : public function_traits<T>\n{\n};\ntemplate <typename T>\nstruct function_traits<const volatile T &&> : public function_traits<T>\n{\n};\n}\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/autoresetevent.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __CPP11OM_AUTO_RESET_EVENT_H__\n#define __CPP11OM_AUTO_RESET_EVENT_H__\n\n#include <cassert>\n#include <thread>\n#include <dsn/utility/hpc_locks/sema.h>\n\n//---------------------------------------------------------\n// AutoResetEvent\n//---------------------------------------------------------\nclass AutoResetEvent\n{\nprivate:\n    // m_status == 1: Event object is signaled.\n    // m_status == 0: Event object is reset and no threads are waiting.\n    // m_status == -N: Event object is reset and N threads are waiting.\n    std::atomic<int> m_status;\n    DefaultSemaphoreType m_sema;\n\npublic:\n    AutoResetEvent(int initialStatus = 0) : m_status(initialStatus)\n    {\n        assert(initialStatus >= 0 && initialStatus <= 1);\n    }\n\n    void signal()\n    {\n        int oldStatus = m_status.load(std::memory_order_relaxed);\n        for (;;) // Increment m_status atomically via CAS loop.\n        {\n            assert(oldStatus <= 1);\n            int newStatus = oldStatus < 1 ? oldStatus + 1 : 1;\n            if (m_status.compare_exchange_weak(\n                    oldStatus, newStatus, std::memory_order_release, std::memory_order_relaxed))\n                break;\n            // The compare-exchange failed, likely because another thread changed m_status.\n            // oldStatus has been updated. Retry the CAS loop.\n        }\n        if (oldStatus < 0)\n            m_sema.signal(); // Release one waiting thread.\n    }\n\n    void wait()\n    {\n        int oldStatus = m_status.fetch_sub(1, std::memory_order_acquire);\n        assert(oldStatus <= 1);\n        if (oldStatus < 1) {\n            m_sema.wait();\n        }\n    }\n\n    bool wait(int timeout_milliseconds)\n    {\n        int oldStatus = m_status.fetch_sub(1, std::memory_order_acquire);\n        assert(oldStatus <= 1);\n        if (oldStatus < 1) {\n            return m_sema.wait(timeout_milliseconds);\n        } else\n            return true;\n    }\n};\n\n#endif // __CPP11OM_AUTO_RESET_EVENT_H__\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/autoreseteventcondvar.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/modern-cpp-threading/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __MCPPT_AUTO_RESET_EVENT_COND_VAR_H__\n#define __MCPPT_AUTO_RESET_EVENT_COND_VAR_H__\n\n#include <cassert>\n#include <mutex>\n#include <condition_variable>\n\n//---------------------------------------------------------\n// AutoResetEventCondVar\n//---------------------------------------------------------\nclass AutoResetEventCondVar\n{\nprivate:\n    // m_status == 1: Event object is signaled.\n    // m_status == 0: Event object is reset and no threads are waiting.\n    // m_status == -N: Event object is reset and N threads are waiting.\n    std::mutex m_mutex;\n    int m_status;\n    std::condition_variable m_condition;\n\npublic:\n    AutoResetEventCondVar(int initialStatus = 0) : m_status(initialStatus)\n    {\n        assert(initialStatus >= 0 && initialStatus <= 1);\n    }\n\n    void signal()\n    {\n        // Increment m_status atomically via critical section.\n        std::lock_guard<std::mutex> lock(m_mutex);\n        int oldStatus = m_status;\n        if (oldStatus == 1)\n            return; // Event object is already signaled.\n        m_status++;\n        if (oldStatus < 0)\n            m_condition.notify_one(); // Release one waiting thread.\n    }\n\n    void wait()\n    {\n        std::unique_lock<std::mutex> lock(m_mutex);\n        int oldStatus = m_status;\n        m_status--;\n        assert(oldStatus <= 1);\n        if (oldStatus < 1) {\n            m_condition.wait(lock);\n        }\n    }\n};\n\n#endif // __MCPPT_AUTO_RESET_EVENT_COND_VAR_H__\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/benaphore.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __CPP11OM_BENAPHORE_H__\n#define __CPP11OM_BENAPHORE_H__\n\n#include <cassert>\n#include <thread>\n#include <atomic>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/hpc_locks/sema.h>\n\n//---------------------------------------------------------\n// NonRecursiveBenaphore\n//---------------------------------------------------------\nclass NonRecursiveBenaphore\n{\nprivate:\n    std::atomic<int> m_contentionCount;\n    DefaultSemaphoreType m_sema;\n\npublic:\n    NonRecursiveBenaphore() : m_contentionCount(0) {}\n\n    void lock()\n    {\n        if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0) {\n            m_sema.wait();\n        }\n    }\n\n    bool tryLock()\n    {\n        if (m_contentionCount.load(std::memory_order_relaxed) != 0)\n            return false;\n        int expected = 0;\n        return m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire);\n    }\n\n    void unlock()\n    {\n        int oldCount = m_contentionCount.fetch_sub(1, std::memory_order_release);\n        assert(oldCount > 0);\n        if (oldCount > 1) {\n            m_sema.signal();\n        }\n    }\n};\n\n//---------------------------------------------------------\n// RecursiveBenaphore\n//---------------------------------------------------------\nclass RecursiveBenaphore\n{\nprivate:\n    std::atomic<int> m_contentionCount;\n    std::atomic<int> m_owner;\n    int m_recursion;\n    DefaultSemaphoreType m_sema;\n\npublic:\n    RecursiveBenaphore() : m_contentionCount(0), m_recursion(0)\n    {\n        m_owner = ::dsn::utils::INVALID_TID;\n    }\n\n    void lock()\n    {\n        auto tid = ::dsn::utils::get_current_tid();\n        if (m_contentionCount.fetch_add(1, std::memory_order_acquire) > 0) {\n            if (tid != m_owner.load(std::memory_order_relaxed))\n                m_sema.wait();\n        }\n        //--- We are now inside the lock ---\n        m_owner.store(tid, std::memory_order_relaxed);\n        m_recursion++;\n    }\n\n    bool tryLock()\n    {\n        auto tid = ::dsn::utils::get_current_tid();\n        if (m_owner.load(std::memory_order_relaxed) == tid) {\n            // Already inside the lock\n            m_contentionCount.fetch_add(1, std::memory_order_relaxed);\n        } else {\n            if (m_contentionCount.load(std::memory_order_relaxed) != 0)\n                return false;\n            int expected = 0;\n            if (!m_contentionCount.compare_exchange_strong(expected, 1, std::memory_order_acquire))\n                return false;\n            //--- We are now inside the lock ---\n            m_owner.store(tid, std::memory_order_relaxed);\n        }\n        m_recursion++;\n        return true;\n    }\n\n    void unlock()\n    {\n#ifndef NDEBUG\n        auto tid = ::dsn::utils::get_current_tid();\n        assert(tid == m_owner.load(std::memory_order_relaxed));\n#endif\n        int recur = --m_recursion;\n        if (recur == 0)\n            m_owner.store(::dsn::utils::INVALID_TID, std::memory_order_relaxed);\n        if (m_contentionCount.fetch_sub(1, std::memory_order_release) > 1) {\n            if (recur == 0)\n                m_sema.signal();\n        }\n        //--- We are now outside the lock ---\n    }\n};\n\n#endif // __CPP11OM_BENAPHORE_H__\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/bitfield.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __CPP11OM_BITFIELD_H__\n#define __CPP11OM_BITFIELD_H__\n\n#include <cassert>\n\n//---------------------------------------------------------\n// BitFieldMember<>: Used internally by ADD_BITFIELD_MEMBER macro.\n// All members are public to simplify compliance with sections 9.0.7 and\n// 9.5.1 of the C++11 standard, thereby avoiding undefined behavior.\n//---------------------------------------------------------\ntemplate <typename T, int Offset, int Bits>\nstruct BitFieldMember\n{\n    T value;\n\n    static_assert(Offset + Bits <= (int)sizeof(T) * 8, \"Member exceeds bitfield boundaries\");\n    static_assert(Bits < (int)sizeof(T) * 8, \"Can't fill entire bitfield with one member\");\n\n    static const T Maximum = (T(1) << Bits) - 1;\n    static const T Mask = Maximum << Offset;\n    T maximum() const { return Maximum; }\n    T one() const { return T(1) << Offset; }\n\n    operator T() const { return (value >> Offset) & Maximum; }\n\n    BitFieldMember &operator=(T v)\n    {\n        assert(v <= Maximum); // v must fit inside the bitfield member\n        value = (value & ~Mask) | (v << Offset);\n        return *this;\n    }\n\n    BitFieldMember &operator+=(T v)\n    {\n        assert(T(*this) + v <= Maximum); // result must fit inside the bitfield member\n        value += v << Offset;\n        return *this;\n    }\n\n    BitFieldMember &operator-=(T v)\n    {\n        assert(T(*this) >= v); // result must not underflow\n        value -= v << Offset;\n        return *this;\n    }\n\n    BitFieldMember &operator++() { return *this += 1; }\n    BitFieldMember &operator++(int) { return *this += 1; } // postfix form\n    BitFieldMember &operator--() { return *this -= 1; }\n    BitFieldMember &operator--(int) { return *this -= 1; } // postfix form\n};\n\n//---------------------------------------------------------\n// BitFieldArray<>: Used internally by ADD_BITFIELD_ARRAY macro.\n// All members are public to simplify compliance with sections 9.0.7 and\n// 9.5.1 of the C++11 standard, thereby avoiding undefined behavior.\n//---------------------------------------------------------\ntemplate <typename T, int BaseOffset, int BitsPerItem, int NumItems>\nstruct BitFieldArray\n{\n    T value;\n\n    static_assert(BaseOffset + BitsPerItem * NumItems <= (int)sizeof(T) * 8,\n                  \"Array exceeds bitfield boundaries\");\n    static_assert(BitsPerItem < (int)sizeof(T) * 8,\n                  \"Can't fill entire bitfield with one array element\");\n\n    static const T Maximum = (T(1) << BitsPerItem) - 1;\n    T maximum() const { return Maximum; }\n    int numItems() const { return NumItems; }\n\n    class Element\n    {\n    private:\n        T &value;\n        int offset;\n\n    public:\n        Element(T &value, int offset) : value(value), offset(offset) {}\n        T mask() const { return Maximum << offset; }\n\n        operator T() const { return (value >> offset) & Maximum; }\n\n        Element &operator=(T v)\n        {\n            assert(v <= Maximum); // v must fit inside the bitfield member\n            value = (value & ~mask()) | (v << offset);\n            return *this;\n        }\n\n        Element &operator+=(T v)\n        {\n            assert(T(*this) + v <= Maximum); // result must fit inside the bitfield member\n            value += v << offset;\n            return *this;\n        }\n\n        Element &operator-=(T v)\n        {\n            assert(T(*this) >= v); // result must not underflow\n            value -= v << offset;\n            return *this;\n        }\n\n        Element &operator++() { return *this += 1; }\n        Element &operator++(int) { return *this += 1; } // postfix form\n        Element &operator--() { return *this -= 1; }\n        Element &operator--(int) { return *this -= 1; } // postfix form\n    };\n\n    Element operator[](int i)\n    {\n        assert(i >= 0 && i < NumItems); // array index must be in range\n        return Element(value, BaseOffset + BitsPerItem * i);\n    }\n\n    const Element operator[](int i) const\n    {\n        assert(i >= 0 && i < NumItems); // array index must be in range\n        return Element(value, BaseOffset + BitsPerItem * i);\n    }\n};\n\n//---------------------------------------------------------\n// Bitfield definition macros.\n// For usage examples, see RWLock and LockReducedDiningPhilosophers.\n// All members are public to simplify compliance with sections 9.0.7 and\n// 9.5.1 of the C++11 standard, thereby avoiding undefined behavior.\n//---------------------------------------------------------\n#define BEGIN_BITFIELD_TYPE(typeName, T)                                                           \\\n    union typeName                                                                                 \\\n    {                                                                                              \\\n        struct Wrapper                                                                             \\\n        {                                                                                          \\\n            T value;                                                                               \\\n        };                                                                                         \\\n        Wrapper wrapper;                                                                           \\\n        typeName(T v = 0) { wrapper.value = v; }                                                   \\\n        typeName &operator=(T v)                                                                   \\\n        {                                                                                          \\\n            wrapper.value = v;                                                                     \\\n            return *this;                                                                          \\\n        }                                                                                          \\\n        operator T &() { return wrapper.value; }                                                   \\\n        operator T() const { return wrapper.value; }                                               \\\n        typedef T StorageType;\n\n#define ADD_BITFIELD_MEMBER(memberName, offset, bits)                                              \\\n    BitFieldMember<StorageType, offset, bits> memberName;\n\n#define ADD_BITFIELD_ARRAY(memberName, offset, bits, numItems)                                     \\\n    BitFieldArray<StorageType, offset, bits, numItems> memberName;\n\n#define END_BITFIELD_TYPE()                                                                        \\\n    }                                                                                              \\\n    ;\n\n#endif // __CPP11OM_BITFIELD_H__\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/readme.txt",
    "content": "from: https://github.com/imzhenyu/cpp11-on-multicore\noriginally from: https://github.com/preshing/cpp11-on-multicore with certain changes\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/rwlock.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __CPP11OM_RWLOCK_H__\n#define __CPP11OM_RWLOCK_H__\n\n#include <cassert>\n#include <atomic>\n#include <random>\n#include <dsn/utility/hpc_locks/sema.h>\n#include <dsn/utility/hpc_locks/bitfield.h>\n\n//---------------------------------------------------------\n// NonRecursiveRWLock\n//---------------------------------------------------------\nclass NonRecursiveRWLock\n{\nprivate:\n    BEGIN_BITFIELD_TYPE(Status, uint32_t)\n    ADD_BITFIELD_MEMBER(readers, 0, 10)\n    ADD_BITFIELD_MEMBER(waitToRead, 10, 10)\n    ADD_BITFIELD_MEMBER(writers, 20, 10)\n    END_BITFIELD_TYPE()\n\n    std::atomic<uint32_t> m_status;\n    DefaultSemaphoreType m_readSema;\n    DefaultSemaphoreType m_writeSema;\n\npublic:\n    NonRecursiveRWLock() : m_status(0) {}\n\n    void lockReader()\n    {\n        Status oldStatus = m_status.load(std::memory_order_relaxed);\n        Status newStatus;\n        do {\n            newStatus = oldStatus;\n            if (oldStatus.writers > 0) {\n                newStatus.waitToRead++;\n            } else {\n                newStatus.readers++;\n            }\n            // CAS until successful. On failure, oldStatus will be updated with the latest value.\n        } while (!m_status.compare_exchange_weak(\n            oldStatus, newStatus, std::memory_order_acquire, std::memory_order_relaxed));\n\n        if (oldStatus.writers > 0) {\n            m_readSema.wait();\n        }\n    }\n\n    void unlockReader()\n    {\n        Status oldStatus = m_status.fetch_sub(Status().readers.one(), std::memory_order_release);\n        assert(oldStatus.readers > 0);\n        if (oldStatus.readers == 1 && oldStatus.writers > 0) {\n            m_writeSema.signal();\n        }\n    }\n\n    bool tryLockReader()\n    {\n        Status oldStatus = m_status.load(std::memory_order_relaxed);\n        Status newStatus;\n\n        newStatus = oldStatus;\n        if (oldStatus.writers > 0) {\n            return false;\n        } else {\n            newStatus.readers++;\n        }\n\n        if (m_status.compare_exchange_weak(\n                oldStatus, newStatus, std::memory_order_acquire, std::memory_order_relaxed)) {\n            return true;\n        }\n\n        else {\n            return false;\n        }\n    }\n\n    void lockWriter()\n    {\n        Status oldStatus = m_status.fetch_add(Status().writers.one(), std::memory_order_acquire);\n        assert(oldStatus.writers + 1 <= Status().writers.maximum());\n        if (oldStatus.readers > 0 || oldStatus.writers > 0) {\n            m_writeSema.wait();\n        }\n    }\n\n    void unlockWriter()\n    {\n        Status oldStatus = m_status.load(std::memory_order_relaxed);\n        Status newStatus;\n        uint32_t waitToRead = 0;\n        do {\n            assert(oldStatus.readers == 0);\n            newStatus = oldStatus;\n            newStatus.writers--;\n            waitToRead = oldStatus.waitToRead;\n            if (waitToRead > 0) {\n                newStatus.waitToRead = 0;\n                newStatus.readers = waitToRead;\n            }\n            // CAS until successful. On failure, oldStatus will be updated with the latest value.\n        } while (!m_status.compare_exchange_weak(\n            oldStatus, newStatus, std::memory_order_release, std::memory_order_relaxed));\n\n        if (waitToRead > 0) {\n            m_readSema.signal(waitToRead);\n        } else if (oldStatus.writers > 1) {\n            m_writeSema.signal();\n        }\n    }\n\n    bool tryLockWriter()\n    {\n        Status oldStatus = m_status.load(std::memory_order_relaxed);\n        Status newStatus;\n\n        newStatus = oldStatus;\n        if (oldStatus.readers > 0 || oldStatus.writers > 0) {\n            return false;\n        } else {\n            newStatus.writers++;\n        }\n\n        if (m_status.compare_exchange_weak(\n                oldStatus, newStatus, std::memory_order_acquire, std::memory_order_relaxed)) {\n            return true;\n        }\n\n        else {\n            return false;\n        }\n    }\n};\n\n//---------------------------------------------------------\n// ReadLockGuard\n//---------------------------------------------------------\ntemplate <class LockType>\nclass ReadLockGuard\n{\nprivate:\n    LockType &m_lock;\n\npublic:\n    ReadLockGuard(LockType &lock) : m_lock(lock) { m_lock.lockReader(); }\n\n    ~ReadLockGuard() { m_lock.unlockReader(); }\n};\n\n//---------------------------------------------------------\n// WriteLockGuard\n//---------------------------------------------------------\ntemplate <class LockType>\nclass WriteLockGuard\n{\nprivate:\n    LockType &m_lock;\n\npublic:\n    WriteLockGuard(LockType &lock) : m_lock(lock) { m_lock.lockWriter(); }\n\n    ~WriteLockGuard() { m_lock.unlockWriter(); }\n};\n\n#endif // __CPP11OM_RWLOCK_H__\n"
  },
  {
    "path": "include/dsn/utility/hpc_locks/sema.h",
    "content": "//---------------------------------------------------------\n// For conditions of distribution and use, see\n// https://github.com/preshing/cpp11-on-multicore/blob/master/LICENSE\n//---------------------------------------------------------\n\n#ifndef __CPP11OM_SEMAPHORE_H__\n#define __CPP11OM_SEMAPHORE_H__\n\n#include <atomic>\n#include <cassert>\n#include <cerrno>\n\n#if defined(_WIN32)\n//---------------------------------------------------------\n// Semaphore (Windows)\n//---------------------------------------------------------\n\n#include <windows.h>\n#undef min\n#undef max\n\nclass Semaphore\n{\nprivate:\n    HANDLE m_hSema;\n\n    Semaphore(const Semaphore &other) = delete;\n    Semaphore &operator=(const Semaphore &other) = delete;\n\npublic:\n    Semaphore(int initial_count = 0)\n    {\n        assert(initial_count >= 0);\n        m_hSema = CreateSemaphore(NULL, initial_count, MAXLONG, NULL);\n    }\n\n    ~Semaphore() { CloseHandle(m_hSema); }\n\n    void wait() { WaitForSingleObject(m_hSema, INFINITE); }\n\n    bool wait(int timeout_milliseconds)\n    {\n        return WAIT_OBJECT_0 == WaitForSingleObject(m_hSema, timeout_milliseconds);\n    }\n\n    void signal(int count = 1) { ReleaseSemaphore(m_hSema, count, NULL); }\n};\n\n#elif defined(__MACH__)\n//---------------------------------------------------------\n// Semaphore (Apple iOS and OSX)\n// Can't use POSIX semaphores due to\n// http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html\n//---------------------------------------------------------\n\n#include <mach/mach.h>\n\nclass Semaphore\n{\nprivate:\n    semaphore_t m_sema;\n\n    Semaphore(const Semaphore &other) = delete;\n    Semaphore &operator=(const Semaphore &other) = delete;\n\npublic:\n    Semaphore(int initial_count = 0)\n    {\n        assert(initial_count >= 0);\n        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initial_count);\n    }\n\n    ~Semaphore() { semaphore_destroy(mach_task_self(), m_sema); }\n\n    void wait() { semaphore_wait(m_sema); }\n\n    bool wait(int timeout_milliseconds)\n    {\n        // TODO: timeout\n        wait();\n        return true;\n    }\n\n    void signal() { semaphore_signal(m_sema); }\n\n    void signal(int count)\n    {\n        while (count-- > 0) {\n            semaphore_signal(m_sema);\n        }\n    }\n};\n\n#elif defined(__unix__)\n//---------------------------------------------------------\n// Semaphore (POSIX, Linux)\n//---------------------------------------------------------\n\n#include <semaphore.h>\n#include <time.h>\n\nclass Semaphore\n{\nprivate:\n    sem_t m_sema;\n\n    Semaphore(const Semaphore &other) = delete;\n    Semaphore &operator=(const Semaphore &other) = delete;\n\npublic:\n    Semaphore(int initial_count = 0)\n    {\n        assert(initial_count >= 0);\n        sem_init(&m_sema, 0, initial_count);\n    }\n\n    ~Semaphore() { sem_destroy(&m_sema); }\n\n    void wait()\n    {\n        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error\n        int rc;\n        do {\n            rc = sem_wait(&m_sema);\n        } while (rc == -1 && errno == EINTR);\n    }\n\n    bool wait(int timeout_milliseconds)\n    {\n        assert(timeout_milliseconds >= 0);\n        struct timespec ts;\n        clock_gettime(CLOCK_REALTIME, &ts);\n        ts.tv_sec += timeout_milliseconds / 1000;\n        ts.tv_nsec += timeout_milliseconds % 1000 * 1000000;\n        if (ts.tv_nsec >= 1000000000) {\n            ++ts.tv_sec;\n            ts.tv_nsec -= 1000000000;\n        }\n        assert(ts.tv_nsec >= 0);\n        assert(ts.tv_nsec < 1000000000);\n\n        return sem_timedwait(&m_sema, &ts) == 0;\n    }\n\n    void signal() { sem_post(&m_sema); }\n\n    void signal(int count)\n    {\n        while (count-- > 0) {\n            sem_post(&m_sema);\n        }\n    }\n};\n\n#else\n\n#error Unsupported platform!\n\n#endif\n\n//---------------------------------------------------------\n// LightweightSemaphore\n//---------------------------------------------------------\nclass LightweightSemaphore\n{\nprivate:\n    std::atomic<int> m_count;\n    Semaphore m_sema;\n    int m_spin_count;\n\n    void waitWithPartialSpinning()\n    {\n        int oldCount;\n        // Is there a better way to set the initial spin count?\n        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,\n        // as threads start hitting the kernel semaphore.\n        int spin = m_spin_count;\n        while (spin--) {\n            oldCount = m_count.load(std::memory_order_relaxed);\n            if ((oldCount > 0) &&\n                m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire))\n                return;\n            std::atomic_signal_fence(\n                std::memory_order_acquire); // Prevent the compiler from collapsing the loop.\n        }\n        oldCount = m_count.fetch_sub(1, std::memory_order_acquire);\n        if (oldCount <= 0) {\n            m_sema.wait();\n        }\n    }\n\npublic:\n    LightweightSemaphore(int initial_count = 0, int spin_count = 128)\n        : m_count(initial_count), m_spin_count(spin_count)\n    {\n        assert(initial_count >= 0);\n    }\n\n    bool tryWait()\n    {\n        int oldCount = m_count.load(std::memory_order_relaxed);\n        return (oldCount > 0 &&\n                m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire));\n    }\n\n    void wait()\n    {\n        if (!tryWait())\n            waitWithPartialSpinning();\n    }\n\n    // Be careful! Should check the return value, and can consume iff the return value is true.\n    bool wait(int timeout_milliseconds)\n    {\n        int oldCount = m_count.fetch_sub(1, std::memory_order_acquire);\n        if (oldCount > 0)\n            return true;\n        if (m_sema.wait(timeout_milliseconds))\n            return true;\n        m_count.fetch_add(1, std::memory_order_release); // restore the substracted count\n        return false;\n    }\n\n    void signal(int count = 1)\n    {\n        assert(count >= 1);\n        int oldCount = m_count.fetch_add(count, std::memory_order_release);\n        int toRelease = -oldCount < count ? -oldCount : count;\n        if (toRelease > 0) {\n            m_sema.signal(toRelease);\n        }\n    }\n};\n\ntypedef LightweightSemaphore DefaultSemaphoreType;\n\n#endif // __CPP11OM_SEMAPHORE_H__\n"
  },
  {
    "path": "include/dsn/utility/join_point.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <list>\n#include <functional>\n#include <string>\n#include <tuple>\n#include <dsn/utility/apply.h>\n\nnamespace dsn {\n\n// A join_point instance is a set of lambdas with the identical function signature.\n// It's typically used for creating hooks at the specific execution points,\n// for example:\n//   - on rpc session establishes\n//   - on process begins to exits\n//   - ...\n// Using join_point, we can inject the behavior in these cases in non-intrusive way.\n//\n// NOTE: \"Join point\" is a concept in Aspect-Oriented-Programming. Each \"advice\" is\n// an extension on the join-point. It's similar with the \"Interceptor Pattern\".\n//   - https://en.wikipedia.org/wiki/Advice_(programming)\n\ntemplate <typename R, typename... Args>\nclass join_point_base\n{\npublic:\n    explicit join_point_base(const char *name) : _name(name) {}\n\n    virtual ~join_point_base()\n    {\n        _advice_entries.clear();\n        _ret_advice_entries.clear();\n    }\n\n    using ReturnedAdviceT = R(Args...);\n    using AdviceT = void(Args...);\n\n    // TODO(wutao): call it add_returned_advice()\n    void put_native(std::function<ReturnedAdviceT> fn) { _ret_advice_entries.push_front(fn); }\n\n    // TODO(wutao): call it add_advice()\n    void put_back(std::function<AdviceT> fn, const char * /*unused*/)\n    {\n        _advice_entries.push_back(std::move(fn));\n    }\n\n    void put_front(std::function<AdviceT> fn, const char * /*unused*/)\n    {\n        _advice_entries.push_front(std::move(fn));\n    }\n\n    const char *name() const { return _name.c_str(); }\n\nprotected:\n    std::list<std::function<ReturnedAdviceT>> _ret_advice_entries;\n    std::list<std::function<AdviceT>> _advice_entries;\n    const std::string _name;\n\nprivate:\n    friend class join_point_test;\n};\n\ntemplate <typename R, typename... Args>\nclass join_point final : public join_point_base<R, Args...>\n{\npublic:\n    using BaseType = join_point_base<R, Args...>;\n    static_assert(!std::is_void<R>::value, \"type R must not be a void\");\n\n    explicit join_point(const char *name) : BaseType(name) {}\n\n    // Execute the hooks sequentially.\n    R execute(Args... args, R default_return_value)\n    {\n        R ret = default_return_value;\n        for (auto &func : BaseType::_ret_advice_entries) {\n            ret = dsn::apply(func, std::make_tuple(std::forward<Args>(args)...));\n        }\n        for (auto &func : BaseType::_advice_entries) {\n            dsn::apply(func, std::make_tuple(std::forward<Args>(args)...));\n        }\n        return ret;\n    }\n};\n\ntemplate <typename... Args>\nclass join_point<void, Args...> final : public join_point_base<void, Args...>\n{\npublic:\n    using BaseType = join_point_base<void, Args...>;\n\n    explicit join_point(const char *name) : BaseType(name) {}\n\n    // Execute the hooks sequentially.\n    void execute(Args... args)\n    {\n        for (auto &func : BaseType::_advice_entries) {\n            dsn::apply(func, std::make_tuple(std::forward<Args>(args)...));\n        }\n    }\n};\n\n} // end namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/link.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     single and double linked list\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <cassert>\n\n//\n// assuming public T::T* next; exists and inited to nullptr in T::T(...)\n//\ntemplate <typename T>\nclass slist\n{\npublic:\n    slist() { _first = _last = nullptr; }\n\n    void add(T *obj)\n    {\n        if (_last) {\n            _last->next = obj;\n            _last = obj;\n        } else {\n            _first = _last = obj;\n        }\n    }\n\n    T *pop_all()\n    {\n        T *ret = _first;\n        _first = _last = nullptr;\n        return ret;\n    }\n\n    T *pop_batch(/*inout*/ int &batch_size)\n    {\n        int c = 0;\n        T *next = _first;\n        while (next) {\n            if (++c >= batch_size)\n                break;\n\n            next = next->next;\n        }\n\n        // all returned\n        batch_size = c;\n\n        if (next == nullptr || next == _last) {\n            T *ret = _first;\n            _first = _last = nullptr;\n            return ret;\n        }\n\n        // partially returned\n        else {\n            // next is included\n            T *ret = _first;\n            _first = next->next;\n            next->next = nullptr;\n            return ret;\n        }\n    }\n\n    T *pop_one()\n    {\n        if (_first) {\n            T *ret = _first;\n\n            if (_first == _last)\n                _first = _last = nullptr;\n            else\n                _first = static_cast<T *>(_first->next);\n\n            ret->next = nullptr;\n            return ret;\n        } else\n            return nullptr;\n    }\n\n    bool is_empty() const { return _first == nullptr; }\n\npublic:\n    T *_first;\n    T *_last;\n};\n\nclass dlink\n{\npublic:\n    dlink() { _next = _prev = (this); }\n    dlink *next() const { return _next; }\n    dlink *prev() const { return _prev; }\n    bool is_alone() const { return _next == this; }\n\n    // insert me before existing link node o [p (this) o]\n    void insert_before(dlink *o)\n    {\n        assert(is_alone()); //, \"must not be linked to other list before insert\");\n\n        auto p = o->_prev;\n\n        this->_next = o;\n        o->_prev = (this);\n\n        p->_next = this;\n        this->_prev = p;\n    }\n\n    // insert me after existing link node o [o (this) n]\n    void insert_after(dlink *o)\n    {\n        assert(is_alone()); //, \"must not be linked to other list before insert\");\n\n        auto n = o->_next;\n\n        this->_prev = o;\n        o->_next = this;\n\n        this->_next = n;\n        n->_prev = this;\n    }\n\n    dlink *remove()\n    {\n        if (!is_alone()) {\n            this->_next->_prev = this->_prev;\n            this->_prev->_next = this->_next;\n            _next = _prev = this;\n        }\n        return (this);\n    }\n\n    dlink *remove_and_get_next()\n    {\n        if (!is_alone()) {\n            auto next = this->_next;\n            this->_next->_prev = this->_prev;\n            this->_prev->_next = this->_next;\n            _next = _prev = this;\n            return next;\n        } else\n            return nullptr;\n    }\n\n    /*\n     *   BEFORE range_remove:\n     *    this <=> [from <=> ... <=> to] <=> x ...\n     *\n     *   AFTER range_remove:\n     *    this <=> x ...\n     *    from <=> ... <=> to <=> from\n     *\n     *    return from;\n     *\n     *   caller must ensure *to* is valid\n     */\n    dlink *range_remove(dlink *to)\n    {\n        auto from = this->next();\n        auto x = to->next();\n\n        this->_next = x;\n        x->_prev = this;\n\n        to->_next = from;\n        from->_prev = to;\n\n        return from;\n    }\n\nprivate:\n    dlink *_next;\n    dlink *_prev;\n};\n"
  },
  {
    "path": "include/dsn/utility/long_adder.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <atomic>\n#include <cstdint>\n#include <functional>\n#include <memory>\n\n#include <dsn/utility/ports.h>\n\n// Refer to https://github.com/apache/kudu/blob/master/src/kudu/util/striped64.h\n\nnamespace dsn {\n\n// Padded POD container for std::atomic<int64_t>. This prevents false sharing of cache lines.\n// Notice that in older versions of GCC `std::is_pod<std::atomic<int64_t>>::value` will return\n// false, thus cacheline_aligned_int64 is not considered to be a POD. However it doesn't matter.\nclass cacheline_aligned_int64\n{\npublic:\n    static constexpr int kAtomicInt64Size = sizeof(std::atomic<int64_t>);\n\n    cacheline_aligned_int64() = default;\n\n    inline bool compare_and_set(int64_t cmp, int64_t value)\n    {\n        return _value.compare_exchange_weak(cmp, value);\n    }\n\n    // Padding advice from Herb Sutter:\n    // http://www.drdobbs.com/parallel/eliminate-false-sharing/217500206?pgno=4\n    std::atomic<int64_t> _value;\n    char pad[CACHELINE_SIZE > kAtomicInt64Size ? CACHELINE_SIZE - kAtomicInt64Size : 1];\n\n    DISALLOW_COPY_AND_ASSIGN(cacheline_aligned_int64);\n} CACHELINE_ALIGNED;\n\nusing cacheline_aligned_int64_ptr =\n    std::unique_ptr<cacheline_aligned_int64, std::function<void(cacheline_aligned_int64 *)>>;\nextern cacheline_aligned_int64_ptr new_cacheline_aligned_int64();\nextern cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size);\n\n// This set of classes is heavily derived from JSR166e, released into the public domain\n// by Doug Lea and the other authors.\n//\n// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/Striped64.java?view=co\n// See: http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/src/jsr166e/LongAdder.java?view=co\n//\n// The striped64 and striped_long_adder implementations here are simplified versions of what's\n// present in JSR166e. However, the core ideas remain the same.\n//\n// Updating a single AtomicInteger in a multi-threaded environment can be quite slow:\n//\n//   1. False sharing of cache lines with other counters.\n//   2. Cache line bouncing from high update rates, especially with many cores.\n//\n// These two problems are addressed by striped64. When there is no contention, it uses CAS on a\n// single base counter to store updates. However, when striped64 detects contention\n// (via a failed CAS operation), it will allocate a small, fixed size hashtable of Cells.\n// A cacheline_aligned_int64 is a simple POD that pads out an atomic<int64_t> to 64 bytes to prevent\n// sharing a cache line.\n//\n// Reading the value of a striped64 requires traversing the hashtable to calculate the true sum.\n//\n// Each updating thread uses a thread-local hashcode to determine its cacheline_aligned_int64 in the\n// hashtable. If a thread fails to CAS its hashed cacheline_aligned_int64, it will do a lightweight\n// rehash operation to try and find an uncontended bucket. Because the hashcode is thread-local,\n// this rehash affects all striped64's accessed by the thread. This is good, since contention on one\n// striped64 is indicative of contention elsewhere too.\n//\n// The hashtable is statically sized to the nearest power of 2 greater than or equal to the\n// number of CPUs. This is sufficient, since this guarantees the existence of a perfect hash\n// function. Due to the random rehashing, the threads should eventually converge to this function.\n// In practice, this scheme has shown to be sufficient.\n//\n// The biggest simplification of this implementation compared to JSR166e is that we do not\n// dynamically grow the table, instead immediately allocating it to the full size.\n// We also do not lazily allocate each cacheline_aligned_int64, instead allocating the entire array\n// at once. This means we waste some additional memory in low contention scenarios, and initial\n// allocation will also be slower. Some of the micro-optimizations were also elided for readability.\nclass striped64\n{\npublic:\n    striped64() = default;\n\nprotected:\n    // NOTE: the destructor is not virtual so that we can ensure that striped64\n    // has no vtable, thus reducing its size. We make it protected to ensure that\n    // no one attempts to delete a striped64* and invokes the wrong destructor.\n    ~striped64() = default;\n\n    enum rehash\n    {\n        kRehash,\n        kNoRehash\n    };\n\n    // CAS the base field.\n    inline bool cas_base(int64_t cmp, int64_t val) { return _base.compare_exchange_weak(cmp, val); }\n\n    // Handles cases of updates involving initialization, resizing, creating new Cells, and/or\n    // contention. See above for further explanation.\n    //\n    // 'Updater' should be a function which takes the current value and returns\n    // the new value.\n    template <class Updater>\n    void retry_update(rehash to_rehash, Updater updater);\n\n    // Sets base and all cells to the given value.\n    void internal_reset(int64_t initial_value);\n\n    // Base value, used mainly when there is no contention, but also as a fallback during\n    // table initialization races. Updated via CAS.\n    std::atomic<int64_t> _base{0};\n\n    // Memory manager of cells. Once the destructor is called, cells will be freed.\n    cacheline_aligned_int64_ptr _cells_holder;\n\n    // Table of cells. When non-null, size is the nearest power of 2 >= NCPU.\n    // If this is set to -1, the pointer is 'locked' and some thread is in the\n    // process of allocating the array.\n    std::atomic<cacheline_aligned_int64 *> _cells{nullptr};\n\n    static uint64_t get_tls_hashcode();\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(striped64);\n\n    // Static hash code per-thread. Shared across all instances to limit thread-local pollution.\n    // Also, if a thread hits a collision on one striped64, it's also likely to collide on\n    // other striped64s too.\n    static __thread uint64_t _tls_hashcode;\n};\n\n// A 64-bit number optimized for high-volume concurrent updates.\n// See striped64 for a longer explanation of the inner workings.\nclass striped_long_adder : striped64\n{\npublic:\n    striped_long_adder() = default;\n\n    ~striped_long_adder() = default;\n\n    void increment_by(int64_t x);\n\n    // Returns the current value.\n    // Note this is not an atomic snapshot in the presence of concurrent updates.\n    int64_t value() const;\n\n    // Call reset() ONLY when necessary.\n    inline void reset() { set(0); }\n\n    // Return the value immediately before it's reset.\n    int64_t fetch_and_reset();\n\nprivate:\n    // `set` is not exposed since it's not an efficient operation\n    void set(int64_t val) { internal_reset(val); }\n\n    DISALLOW_COPY_AND_ASSIGN(striped_long_adder);\n};\n\nclass concurrent_long_adder\n{\npublic:\n    concurrent_long_adder();\n    ~concurrent_long_adder() = default;\n\n    void increment_by(int64_t x);\n\n    // Returns the current value.\n    // Note this is not an atomic snapshot in the presence of concurrent updates.\n    int64_t value() const;\n\n    // Call reset() ONLY when necessary.\n    inline void reset() { set(0); }\n\n    // Return the value immediately before it's reset.\n    int64_t fetch_and_reset();\n\nprivate:\n    // `set` is not exposed since it's not an efficient operation\n    void set(int64_t val);\n\n    cacheline_aligned_int64_ptr _cells_holder;\n    cacheline_aligned_int64 *_cells;\n\n    DISALLOW_COPY_AND_ASSIGN(concurrent_long_adder);\n};\n\n// Use template to wrap a long_adder implementation rather than inherit from a base class for\n// the reason that virtual function will increase the class size and slow down the execution.\ntemplate <typename Adder>\nclass long_adder_wrapper\n{\npublic:\n    long_adder_wrapper() = default;\n\n    ~long_adder_wrapper() = default;\n\n    inline void increment_by(int64_t x) { adder.increment_by(x); }\n    inline void increment() { increment_by(1); }\n    inline void decrement() { increment_by(-1); }\n\n    // Returns the current value.\n    // Note this is not an atomic snapshot in the presence of concurrent updates.\n    inline int64_t value() const { return adder.value(); }\n\n    // Resets the counter state to zero. Call it ONLY when necessary.\n    inline void reset() { adder.reset(); }\n\n    // Return the value immediately before it's reset.\n    inline int64_t fetch_and_reset() { return adder.fetch_and_reset(); }\n\nprivate:\n    Adder adder;\n\n    DISALLOW_COPY_AND_ASSIGN(long_adder_wrapper);\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/math.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <vector>\n#include <cstdint>\n\nnamespace dsn {\nnamespace utils {\n\ndouble mean_stddev(const std::vector<uint32_t> &result_set, bool partial_sample);\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/metrics.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <atomic>\n#include <bitset>\n#include <functional>\n#include <memory>\n#include <mutex>\n#include <set>\n#include <string>\n#include <type_traits>\n#include <unordered_map>\n#include <utility>\n#include <vector>\n\n#include <boost/asio/deadline_timer.hpp>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/alloc.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/utility/casts.h>\n#include <dsn/utility/enum_helper.h>\n#include <dsn/utility/long_adder.h>\n#include <dsn/utility/nth_element.h>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/string_view.h>\n\n// A metric library (for details pls see https://github.com/apache/incubator-pegasus/issues/922)\n// inspired by Kudu metrics (https://github.com/apache/kudu/blob/master/src/kudu/util/metrics.h).\n//\n//\n// Example of defining and instantiating a metric entity\n// -----------------------------------------------------\n// Define an entity type at the top of your .cpp file (not within any namespace):\n// METRIC_DEFINE_entity(my_entity);\n//\n// To use the entity type, declare it at the top of any .h/.cpp file (not within any namespace):\n// METRIC_DECLARE_entity(my_entity);\n//\n// Instantiating the entity in whatever class represents it:\n// entity_instance = METRIC_ENTITY_my_entity.instantiate(my_entity_id, ...);\n//\n//\n// Example of defining and instantiating a metric\n// -----------------------------------------------------\n// Define an entity type at the top of your .cpp file (not within any namespace):\n// METRIC_DEFINE_gauge_int64(my_entity,\n//                           my_gauge_name,\n//                           dsn::metric_unit::kMilliSeconds,\n//                           \"the description for my gauge\");\n//\n// To use the metric prototype, declare it at the top of any .h/.cpp file (not within any\n// namespace):\n// METRIC_DECLARE_gauge_int64(my_gauge_name);\n//\n// Instantiating the metric in whatever class represents it with some initial arguments, if any:\n// metric_instance = METRIC_my_gauge_name.instantiate(entity_instance, ...);\n\n// Convenient macros are provided to define entity types and metric prototypes.\n#define METRIC_DEFINE_entity(name) ::dsn::metric_entity_prototype METRIC_ENTITY_##name(#name)\n#define METRIC_DEFINE_gauge_int64(entity_type, name, unit, desc, ...)                              \\\n    ::dsn::gauge_prototype<int64_t> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})\n#define METRIC_DEFINE_gauge_double(entity_type, name, unit, desc, ...)                             \\\n    ::dsn::gauge_prototype<double> METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})\n// There are 2 kinds of counters:\n// - `counter` is the general type of counter that is implemented by striped_long_adder, which can\n//   achieve high performance while consuming less memory if it's not updated very frequently.\n// - `concurrent_counter` uses concurrent_long_adder as the underlying implementation. It has\n//   higher performance while consuming more memory if it's updated very frequently.\n// See also include/dsn/utility/long_adder.h for details.\n#define METRIC_DEFINE_counter(entity_type, name, unit, desc, ...)                                  \\\n    dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name(                          \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n#define METRIC_DEFINE_concurrent_counter(entity_type, name, unit, desc, ...)                       \\\n    dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name(                       \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n#define METRIC_DEFINE_volatile_counter(entity_type, name, unit, desc, ...)                         \\\n    dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name(                           \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n#define METRIC_DEFINE_concurrent_volatile_counter(entity_type, name, unit, desc, ...)              \\\n    dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name(                        \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n\n// The percentile supports both integral and floating types.\n#define METRIC_DEFINE_percentile_int64(entity_type, name, unit, desc, ...)                         \\\n    dsn::percentile_prototype<int64_t> METRIC_##name(                                              \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n#define METRIC_DEFINE_percentile_double(entity_type, name, unit, desc, ...)                        \\\n    dsn::floating_percentile_prototype<double> METRIC_##name(                                      \\\n        {#entity_type, #name, unit, desc, ##__VA_ARGS__})\n\n// The following macros act as forward declarations for entity types and metric prototypes.\n#define METRIC_DECLARE_entity(name) extern ::dsn::metric_entity_prototype METRIC_ENTITY_##name\n#define METRIC_DECLARE_gauge_int64(name) extern ::dsn::gauge_prototype<int64_t> METRIC_##name\n#define METRIC_DECLARE_gauge_double(name) extern ::dsn::gauge_prototype<double> METRIC_##name\n#define METRIC_DECLARE_counter(name)                                                               \\\n    extern dsn::counter_prototype<dsn::striped_long_adder, false> METRIC_##name\n#define METRIC_DECLARE_concurrent_counter(name)                                                    \\\n    extern dsn::counter_prototype<dsn::concurrent_long_adder, false> METRIC_##name\n#define METRIC_DECLARE_volatile_counter(name)                                                      \\\n    extern dsn::counter_prototype<dsn::striped_long_adder, true> METRIC_##name\n#define METRIC_DECLARE_concurrent_volatile_counter(name)                                           \\\n    extern dsn::counter_prototype<dsn::concurrent_long_adder, true> METRIC_##name\n#define METRIC_DECLARE_percentile_int64(name)                                                      \\\n    extern dsn::percentile_prototype<int64_t> METRIC_##name\n#define METRIC_DECLARE_percentile_double(name)                                                     \\\n    extern dsn::floating_percentile_prototype<double> METRIC_##name\n\nnamespace dsn {\n\nclass metric_prototype;\nclass metric;\nusing metric_ptr = ref_ptr<metric>;\n\nclass metric_entity : public ref_counter\n{\npublic:\n    using attr_map = std::unordered_map<std::string, std::string>;\n    using metric_map = std::unordered_map<const metric_prototype *, metric_ptr>;\n\n    const std::string &id() const { return _id; }\n\n    attr_map attributes() const;\n\n    metric_map metrics() const;\n\n    // args are the parameters that are used to construct the object of MetricType\n    template <typename MetricType, typename... Args>\n    ref_ptr<MetricType> find_or_create(const metric_prototype *prototype, Args &&... args)\n    {\n        std::lock_guard<std::mutex> guard(_mtx);\n\n        metric_map::const_iterator iter = _metrics.find(prototype);\n        if (iter != _metrics.end()) {\n            auto raw_ptr = down_cast<MetricType *>(iter->second.get());\n            return raw_ptr;\n        }\n\n        ref_ptr<MetricType> ptr(new MetricType(prototype, std::forward<Args>(args)...));\n        _metrics[prototype] = ptr;\n        return ptr;\n    }\n\nprivate:\n    friend class metric_registry;\n    friend class ref_ptr<metric_entity>;\n\n    metric_entity(const std::string &id, attr_map &&attrs);\n\n    ~metric_entity();\n\n    void set_attributes(attr_map &&attrs);\n\n    const std::string _id;\n\n    mutable std::mutex _mtx;\n    attr_map _attrs;\n    metric_map _metrics;\n\n    DISALLOW_COPY_AND_ASSIGN(metric_entity);\n};\n\nusing metric_entity_ptr = ref_ptr<metric_entity>;\n\nclass metric_entity_prototype\n{\npublic:\n    explicit metric_entity_prototype(const char *name);\n    ~metric_entity_prototype();\n\n    const char *name() const { return _name; }\n\n    // Create an entity with the given ID and attributes, if any.\n    metric_entity_ptr instantiate(const std::string &id, metric_entity::attr_map attrs) const;\n    metric_entity_ptr instantiate(const std::string &id) const;\n\nprivate:\n    const char *const _name;\n\n    DISALLOW_COPY_AND_ASSIGN(metric_entity_prototype);\n};\n\nclass metric_registry : public utils::singleton<metric_registry>\n{\npublic:\n    using entity_map = std::unordered_map<std::string, metric_entity_ptr>;\n\n    entity_map entities() const;\n\nprivate:\n    friend class metric_entity_prototype;\n    friend class utils::singleton<metric_registry>;\n\n    metric_registry();\n    ~metric_registry();\n\n    metric_entity_ptr find_or_create_entity(const std::string &id, metric_entity::attr_map &&attrs);\n\n    mutable std::mutex _mtx;\n    entity_map _entities;\n\n    DISALLOW_COPY_AND_ASSIGN(metric_registry);\n};\n\nenum class metric_unit\n{\n    kNanoSeconds,\n    kMicroSeconds,\n    kMilliSeconds,\n    kSeconds,\n    kRequests,\n    kInvalidUnit,\n};\n\nENUM_BEGIN(metric_unit, metric_unit::kInvalidUnit)\nENUM_REG(metric_unit::kNanoSeconds)\nENUM_REG(metric_unit::kMicroSeconds)\nENUM_REG(metric_unit::kMilliSeconds)\nENUM_REG(metric_unit::kSeconds)\nENUM_END(metric_unit)\n\nclass metric_prototype\n{\npublic:\n    struct ctor_args\n    {\n        const string_view entity_type;\n        const string_view name;\n        const metric_unit unit;\n        const string_view desc;\n    };\n\n    string_view entity_type() const { return _args.entity_type; }\n\n    string_view name() const { return _args.name; }\n\n    metric_unit unit() const { return _args.unit; }\n\n    string_view description() const { return _args.desc; }\n\nprotected:\n    explicit metric_prototype(const ctor_args &args);\n    virtual ~metric_prototype();\n\nprivate:\n    const ctor_args _args;\n\n    DISALLOW_COPY_AND_ASSIGN(metric_prototype);\n};\n\n// metric_prototype_with<MetricType> can help to implement the prototype of each type of metric\n// to construct a metric object conveniently.\ntemplate <typename MetricType>\nclass metric_prototype_with : public metric_prototype\n{\npublic:\n    explicit metric_prototype_with(const ctor_args &args) : metric_prototype(args) {}\n    virtual ~metric_prototype_with() = default;\n\n    // Construct a metric object based on the instance of metric_entity.\n    template <typename... Args>\n    ref_ptr<MetricType> instantiate(const metric_entity_ptr &entity, Args &&... args) const\n    {\n        return entity->find_or_create<MetricType>(this, std::forward<Args>(args)...);\n    }\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(metric_prototype_with);\n};\n\n// Base class for each type of metric.\n// Every metric class should inherit from this class.\n//\n// User object should hold a ref_ptr of a metric, while the entity will hold another ref_ptr.\n// The ref count of a metric may becomes 1, which means the metric is only held by the entity:\n// After a period of configurable time, if the ref count is still 1, the metric will be dropped\n// in that it's considered to be useless. During the period when the metric is retained, once\n// the same one is instantiated again, it will not be removed; whether the metric is instantiated,\n// however, its lastest value is visible.\nclass metric : public ref_counter\n{\npublic:\n    const metric_prototype *prototype() const { return _prototype; }\n\nprotected:\n    explicit metric(const metric_prototype *prototype);\n    virtual ~metric() = default;\n\n    const metric_prototype *const _prototype;\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(metric);\n};\n\n// A gauge is a metric that represents a single numerical value that can arbitrarily go up and\n// down. Usually there are 2 scenarios for a guage.\n//\n// Firstly, a gauge can be used as an instantaneous measurement of a discrete value. Typical\n// usages in this scenario are current memory usage, the total capacity and available ratio of\n// a disk, etc.\n//\n// Secondly, a gauge can be used as a counter that increases and decreases. In this scenario only\n// integral types are supported, and its typical usages are the number of tasks in queues, current\n// number of running manual compacts, etc.\ntemplate <typename T, typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>\nclass gauge : public metric\n{\npublic:\n    T value() const { return _value.load(std::memory_order_relaxed); }\n\n    void set(const T &val) { _value.store(val, std::memory_order_relaxed); }\n\n    template <typename Int = T,\n              typename = typename std::enable_if<std::is_integral<Int>::value>::type>\n    void increment_by(Int x)\n    {\n        _value.fetch_add(x, std::memory_order_relaxed);\n    }\n\n    template <typename Int = T,\n              typename = typename std::enable_if<std::is_integral<Int>::value>::type>\n    void decrement_by(Int x)\n    {\n        increment_by(-x);\n    }\n\n    template <typename Int = T,\n              typename = typename std::enable_if<std::is_integral<Int>::value>::type>\n    void increment()\n    {\n        increment_by(1);\n    }\n\n    template <typename Int = T,\n              typename = typename std::enable_if<std::is_integral<Int>::value>::type>\n    void decrement()\n    {\n        increment_by(-1);\n    }\n\nprotected:\n    gauge(const metric_prototype *prototype, const T &initial_val)\n        : metric(prototype), _value(initial_val)\n    {\n    }\n\n    gauge(const metric_prototype *prototype);\n\n    virtual ~gauge() = default;\n\nprivate:\n    friend class metric_entity;\n    friend class ref_ptr<gauge<T>>;\n\n    std::atomic<T> _value;\n\n    DISALLOW_COPY_AND_ASSIGN(gauge);\n};\n\ntemplate <>\ngauge<int64_t>::gauge(const metric_prototype *prototype) : gauge(prototype, 0)\n{\n}\n\ntemplate <>\ngauge<double>::gauge(const metric_prototype *prototype) : gauge(prototype, 0.0)\n{\n}\n\ntemplate <typename T>\nusing gauge_ptr = ref_ptr<gauge<T>>;\n\ntemplate <typename T>\nusing gauge_prototype = metric_prototype_with<gauge<T>>;\n\n// A counter in essence is a 64-bit integer that increases monotonically. It should be noted that\n// the counter does not support to decrease. If decrease is needed, please consider to use the\n// gauge instead.\n//\n// The counter can be typically used to measure the number of processed requests, which in the\n// future can be help to compute the QPS. All counters start out at 0, and are non-negative\n// since they are monotonic.\n//\n// `IsVolatile` is false by default. Once it's specified as true, the counter will be volatile.\n// The value() function of a volatile counter will reset the counter atomically after its value\n// is fetched. A volatile counter can also be called as a \"recent\" counter.\n//\n// Sometimes \"recent\" counters are needed, such as the number of recent failed beacons sent from\n// replica server, the count of updating configurations of partitions recently, etc. The \"recent\"\n// count can be considered to be the accumulated count since it has been fetched last by value().\n//\n// In most cases, a general (i.e. non-volatile) counter is enough, which means it can also work\n// for \"recent\" counters. For example, in Prometheus, delta() can be used to compute \"recent\"\n// count for a general counter. Therefore, declare a counter as volatile only when necessary.\ntemplate <typename Adder = striped_long_adder, bool IsVolatile = false>\nclass counter : public metric\n{\npublic:\n    // To decide which member function should be called by template parameter, the parameter\n    // should be one of the class template parameters in case that the parameter is needed to\n    // be written each time the member function is called.\n    //\n    // Using class template parameter to decide which member function should be called, another\n    // function template parameter with the same meaning should be introduced, since the class\n    // template parameter cannot be used as a function template parameter again and will lead\n    // to compilation error.\n    template <bool Volatile = IsVolatile,\n              typename = typename std::enable_if<!Volatile && !IsVolatile>::type>\n    int64_t value() const\n    {\n        return _adder.value();\n    }\n\n    template <bool Volatile = IsVolatile,\n              typename = typename std::enable_if<Volatile && IsVolatile>::type>\n    int64_t value()\n    {\n        return _adder.fetch_and_reset();\n    }\n\n    // NOTICE: x MUST be a non-negative integer.\n    void increment_by(int64_t x)\n    {\n        dassert_f(x >= 0, \"delta({}) by increment for counter must be a non-negative integer\", x);\n        _adder.increment_by(x);\n    }\n\n    void increment() { _adder.increment(); }\n\n    void reset() { _adder.reset(); }\n\nprotected:\n    counter(const metric_prototype *prototype) : metric(prototype) {}\n\n    virtual ~counter() = default;\n\nprivate:\n    friend class metric_entity;\n    friend class ref_ptr<counter<Adder, IsVolatile>>;\n\n    long_adder_wrapper<Adder> _adder;\n\n    DISALLOW_COPY_AND_ASSIGN(counter);\n};\n\ntemplate <typename Adder = striped_long_adder, bool IsVolatile = false>\nusing counter_ptr = ref_ptr<counter<Adder, IsVolatile>>;\n\ntemplate <bool IsVolatile = false>\nusing concurrent_counter_ptr = counter_ptr<concurrent_long_adder, IsVolatile>;\n\ntemplate <typename Adder = striped_long_adder, bool IsVolatile = false>\nusing counter_prototype = metric_prototype_with<counter<Adder, IsVolatile>>;\n\ntemplate <typename Adder = striped_long_adder>\nusing volatile_counter_ptr = ref_ptr<counter<Adder, true>>;\n\nusing concurrent_volatile_counter_ptr = counter_ptr<concurrent_long_adder, true>;\n\ntemplate <typename Adder = striped_long_adder>\nusing volatile_counter_prototype = metric_prototype_with<counter<Adder, true>>;\n\n// All supported kinds of kth percentiles. User can configure required kth percentiles for\n// each percentile. Only configured kth percentiles will be computed. This can reduce CPU\n// consumption.\nenum class kth_percentile_type : size_t\n{\n    P50,\n    P90,\n    P95,\n    P99,\n    P999,\n    COUNT,\n    INVALID\n};\n\n// Support to load from configuration files for percentiles.\nENUM_BEGIN(kth_percentile_type, kth_percentile_type::INVALID)\nENUM_REG(kth_percentile_type::P50)\nENUM_REG(kth_percentile_type::P90)\nENUM_REG(kth_percentile_type::P95)\nENUM_REG(kth_percentile_type::P99)\nENUM_REG(kth_percentile_type::P999)\nENUM_END(kth_percentile_type)\n\nconst std::vector<double> kKthDecimals = {0.5, 0.9, 0.95, 0.99, 0.999};\n\ninline size_t kth_percentile_to_nth_index(size_t size, size_t kth_index)\n{\n    auto decimal = kKthDecimals[kth_index];\n    // Since the kth percentile is the value that is greater than k percent of the data values after\n    // ranking them (https://people.richland.edu/james/ictcm/2001/descriptive/helpposition.html),\n    // compute the nth index by size * decimal rather than size * decimal - 1.\n    return static_cast<size_t>(size * decimal);\n}\n\ninline size_t kth_percentile_to_nth_index(size_t size, kth_percentile_type type)\n{\n    return kth_percentile_to_nth_index(size, static_cast<size_t>(type));\n}\n\nstd::set<kth_percentile_type> get_all_kth_percentile_types()\n{\n    std::set<kth_percentile_type> all_types;\n    for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {\n        all_types.insert(static_cast<kth_percentile_type>(i));\n    }\n    return all_types;\n}\nconst std::set<kth_percentile_type> kAllKthPercentileTypes = get_all_kth_percentile_types();\n\n// `percentile_timer` is a timer class that encapsulates the details how each percentile is\n// computed periodically.\n//\n// To be instantiated, it requires `interval_ms` at which a percentile is computed and `exec`\n// which is used to compute percentile.\n//\n// In case that all percentiles are computed at the same time and lead to very high load,\n// first computation for percentile will be delayed at a random interval.\nclass percentile_timer\n{\npublic:\n    using exec_fn = std::function<void()>;\n\n    percentile_timer(uint64_t interval_ms, exec_fn exec);\n    ~percentile_timer() = default;\n\n    // Get the initial delay that is randomly generated by `generate_initial_delay_ms()`.\n    uint64_t get_initial_delay_ms() const { return _initial_delay_ms; }\n\nprivate:\n    // Generate an initial delay randomly in case that all percentiles are computed at the\n    // same time.\n    static uint64_t generate_initial_delay_ms(uint64_t interval_ms);\n\n    void on_timer(const boost::system::error_code &ec);\n\n    const uint64_t _initial_delay_ms;\n    const uint64_t _interval_ms;\n    const exec_fn _exec;\n    std::unique_ptr<boost::asio::deadline_timer> _timer;\n};\n\n// The percentile is a metric type that samples observations. The size of samples has an upper\n// bound. Once the maximum size is reached, the earliest observations will be overwritten.\n//\n// On the other hand, kth percentiles, such as P50, P90, P95, P99, P999, will be calculated\n// periodically over all samples. The kth percentiles which are calculated are configurable\n// provided that they are of valid kth_percentile_type (i.e. in kAllKthPercentileTypes).\n//\n// The most common usage of percentile is latency, such as server-level and replica-level\n// latencies. For example, if P99 latency is 10 ms, it means the latencies of 99% requests\n// are less than 10 ms.\n//\n// The percentile is implemented by the finder for nth elements. Each kth percentile is firstly\n// converted to nth index; then, find the element corresponding to the nth index.\ntemplate <typename T,\n          typename NthElementFinder = stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>\nclass percentile : public metric\n{\npublic:\n    using value_type = T;\n    using size_type = typename NthElementFinder::size_type;\n\n    void set(const value_type &val)\n    {\n        const auto index = _tail.fetch_add(1, std::memory_order_relaxed);\n        _samples.get()[index & (_sample_size - 1)] = val;\n    }\n\n    // If `type` is not configured, it will return false with zero value stored in `val`;\n    // otherwise, it will always return true with the value corresponding to `type`.\n    bool get(kth_percentile_type type, value_type &val) const\n    {\n        const auto index = static_cast<size_t>(type);\n        dcheck_lt(index, static_cast<size_t>(kth_percentile_type::COUNT));\n\n        val = _full_nth_elements[index].load(std::memory_order_relaxed);\n        return _kth_percentile_bitset.test(index);\n    }\n\n    bool timer_enabled() const { return !!_timer; }\n\n    uint64_t get_initial_delay_ms() const\n    {\n        return timer_enabled() ? _timer->get_initial_delay_ms() : 0;\n    }\n\n    static const size_type kDefaultSampleSize = 4096;\n\nprotected:\n    // interval_ms is the interval between the computations for percentiles. Its unit is\n    // milliseconds. It's suggested that interval_ms should be near the period between pulls\n    // from or pushes to the monitoring system.\n    percentile(const metric_prototype *prototype,\n               uint64_t interval_ms = 10000,\n               const std::set<kth_percentile_type> &kth_percentiles = kAllKthPercentileTypes,\n               size_type sample_size = kDefaultSampleSize)\n        : metric(prototype),\n          _sample_size(sample_size),\n          _last_real_sample_size(0),\n          _samples(cacheline_aligned_alloc_array<value_type>(sample_size, value_type{})),\n          _tail(0),\n          _kth_percentile_bitset(),\n          _full_nth_elements(static_cast<size_t>(kth_percentile_type::COUNT)),\n          _nth_element_finder(),\n          _timer()\n    {\n        dassert(_sample_size > 0 && (_sample_size & (_sample_size - 1)) == 0,\n                \"sample_sizes should be > 0 and power of 2\");\n\n        dassert(_samples, \"_samples should be valid pointer\");\n\n        for (const auto &kth : kth_percentiles) {\n            _kth_percentile_bitset.set(static_cast<size_t>(kth));\n        }\n\n        for (size_type i = 0; i < _full_nth_elements.size(); ++i) {\n            _full_nth_elements[i].store(value_type{}, std::memory_order_relaxed);\n        }\n\n#ifdef DSN_MOCK_TEST\n        if (interval_ms == 0) {\n            // Timer is disabled.\n            return;\n        }\n#else\n        dcheck_gt(interval_ms, 0);\n#endif\n\n        _timer.reset(new percentile_timer(\n            interval_ms,\n            std::bind(&percentile<value_type, NthElementFinder>::find_nth_elements, this)));\n    }\n\n    virtual ~percentile() = default;\n\nprivate:\n    using nth_container_type = typename NthElementFinder::nth_container_type;\n\n    friend class metric_entity;\n    friend class ref_ptr<percentile<value_type, NthElementFinder>>;\n\n    void find_nth_elements()\n    {\n        size_type real_sample_size = std::min(static_cast<size_type>(_tail.load()), _sample_size);\n        if (real_sample_size == 0) {\n            // No need to find since there has not been any sample yet.\n            return;\n        }\n\n        // If the size of samples changes, the nth indexs should be updated.\n        if (real_sample_size != _last_real_sample_size) {\n            set_real_nths(real_sample_size);\n            _last_real_sample_size = real_sample_size;\n        }\n\n        // Find nth elements.\n        std::vector<T> array(real_sample_size);\n        std::copy(_samples.get(), _samples.get() + real_sample_size, array.begin());\n        _nth_element_finder(array.begin(), array.begin(), array.end());\n\n        // Store nth elements.\n        const auto &elements = _nth_element_finder.elements();\n        for (size_t i = 0, next = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {\n            if (!_kth_percentile_bitset.test(i)) {\n                continue;\n            }\n            _full_nth_elements[i].store(elements[next++], std::memory_order_relaxed);\n        }\n    }\n\n    void set_real_nths(size_type real_sample_size)\n    {\n        nth_container_type nths;\n        for (size_t i = 0; i < static_cast<size_t>(kth_percentile_type::COUNT); ++i) {\n            if (!_kth_percentile_bitset.test(i)) {\n                continue;\n            }\n\n            auto size = static_cast<size_t>(real_sample_size);\n            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, i));\n            nths.push_back(nth);\n        }\n\n        _nth_element_finder.set_nths(nths);\n    }\n\n    const size_type _sample_size;\n    size_type _last_real_sample_size;\n    cacheline_aligned_ptr<value_type> _samples;\n    std::atomic<uint64_t> _tail; // use unsigned int to avoid running out of bound\n    std::bitset<static_cast<size_t>(kth_percentile_type::COUNT)> _kth_percentile_bitset;\n    std::vector<std::atomic<value_type>> _full_nth_elements;\n    NthElementFinder _nth_element_finder;\n\n    std::unique_ptr<percentile_timer> _timer;\n};\n\ntemplate <typename T,\n          typename NthElementFinder = stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>\nusing percentile_ptr = ref_ptr<percentile<T, NthElementFinder>>;\n\ntemplate <typename T,\n          typename NthElementFinder = stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>\nusing percentile_prototype = metric_prototype_with<percentile<T, NthElementFinder>>;\n\ntemplate <typename T,\n          typename NthElementFinder = floating_stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_percentile = percentile<T, NthElementFinder>;\n\ntemplate <typename T,\n          typename NthElementFinder = floating_stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_percentile_ptr = ref_ptr<floating_percentile<T, NthElementFinder>>;\n\ntemplate <typename T,\n          typename NthElementFinder = floating_stl_nth_element_finder<T>,\n          typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_percentile_prototype =\n    metric_prototype_with<floating_percentile<T, NthElementFinder>>;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/nth_element.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <cstdint>\n#include <functional>\n#include <limits>\n#include <type_traits>\n#include <utility>\n#include <vector>\n\n#include <fmt/format.h>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\n\n// The finder helps to find multiple nth elements of a sequence container (e.g. std::vector)\n// at a time, based on nth_element() of STL.\ntemplate <typename T, typename Compare = std::less<T>>\nclass stl_nth_element_finder\n{\npublic:\n    using value_type = T;\n    using container_type = std::vector<value_type>;\n    using size_type = typename container_type::size_type;\n    using nth_container_type = std::vector<size_type>;\n\n    stl_nth_element_finder(const Compare &comp = Compare()) : _nths(), _elements(), _comp(comp) {}\n\n    // Set with specified nth indexes. An nth index is typically an index of the sequence\n    // container (e.g. std::vector). This method allows nth indexes to be updated dynamically.\n    //\n    // There are 2 reasons why both `_nths` and `_elements` are put into the sequence container:\n    //\n    // (1) The users of stl_nth_element_finder, such as the metric of percentile, may pass\n    // duplicate nth indexes to `_nths`. For example, suppose that the sampled window size is\n    // 100, both P99 and P999 will have the same nth element -- namely 99th element. Thus it's\n    // will be convenient for users if `nths` can contain duplicate elements.\n    //\n    // The sequence container can contain duplicate elements, even if all elements in the container\n    // are sorted. Therefore, there may be identical indexes in `nths`.\n    //\n    // (2) The sequence container is more cache-friendly. While an nth element is selected, it's\n    // cache-friendly to write it into `_elements`. After all nth elements are collected into\n    // `_elements`, scanning them (`elements()`) is also cache-friendly, even if there are many\n    // nth indexes in `_nths`. In contrast to this, access directly to the nth element in array\n    // will not be cache-friendly especially when the array is large.\n    //\n    // Notice that the indexes in `nths` list must be ordered. After `operator()` is executed,\n    // the elements returned by `elements()` will be in the order of the sorted nth indexes.\n    void set_nths(const nth_container_type &nths)\n    {\n        _nths = nths;\n        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),\n                  \"nth indexes({}) is not sorted\",\n                  fmt::join(_nths, \" \"));\n\n        _elements.assign(_nths.size(), value_type{});\n    }\n\n    // Find the multiple nth elements.\n    //\n    // Typically `begin` is the beginning iterator of the sequence container. `begin` plus each\n    // member of `_nths` will be the real nth element of the sequence container.\n    //\n    // [first, last) is the real range for finding the multiple nth elements.\n    template <typename RandomAccessIterator>\n    void\n    operator()(RandomAccessIterator begin, RandomAccessIterator first, RandomAccessIterator last)\n    {\n        for (size_type i = 0; i < _nths.size();) {\n            auto nth_iter = begin + _nths[i];\n            dassert_f(nth_iter >= first && nth_iter < last, \"Invalid iterators for nth_element()\");\n            std::nth_element(first, nth_iter, last, _comp);\n            _elements[i] = *nth_iter;\n\n            // Identical nth indexes should be processed. See `set_nths()` for details.\n            for (++i; i < _nths.size() && _nths[i] == _nths[i - 1]; ++i) {\n                _elements[i] = *nth_iter;\n            }\n\n            first = nth_iter + 1;\n        }\n    }\n\n    const container_type &elements() const { return _elements; }\n\nprivate:\n    nth_container_type _nths;\n    container_type _elements;\n    Compare _comp;\n\n    DISALLOW_COPY_AND_ASSIGN(stl_nth_element_finder);\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nclass floating_comparator\n{\npublic:\n    bool operator()(const T &lhs, const T &rhs) const\n    {\n        return rhs - lhs >= std::numeric_limits<T>::epsilon();\n    }\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_stl_nth_element_finder = stl_nth_element_finder<T, floating_comparator<T>>;\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/optional.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     A naive implementation of optional type. Mainly for avoiding boost dependency.\n *\n * Revision history:\n *     2016-01-15, Tianyi Wang, first version\n *     2016-01-25, Tianyi Wang, add none placeholder\n */\n\n#pragma once\n#include <utility>\n\nnamespace dsn {\nstruct none_placeholder_t\n{\n};\nconstexpr none_placeholder_t none{};\n\ntemplate <typename T>\nclass optional\n{\n    bool _is_some;\n    char _data_placeholder[sizeof(T)];\n\npublic:\n    optional() : _is_some(false) {}\n    /*implicit*/ optional(none_placeholder_t) : optional() {}\n    /*implicit*/ optional(const optional &that) : _is_some(true)\n    {\n        new (_data_placeholder) T{reinterpret_cast<const T &>(that._data_placeholder)};\n    }\n\n    /*implicit*/ optional(optional &&that) : _is_some(true)\n    {\n        new (_data_placeholder) T{std::move(reinterpret_cast<T &&>(that._data_placeholder))};\n        that.reset();\n    }\n    template <typename... Args>\n    /*implicit*/ optional(Args &&... args) : _is_some(true)\n    {\n        new (_data_placeholder) T{std::forward<Args>(args)...};\n    }\n\n    // please use explicit reset\n    optional &operator=(const optional &that) = delete;\n\n    bool is_some() const { return _is_some; }\n    bool is_none() const { return !_is_some; }\n    const T &unwrap_or(const T &def) const\n    {\n        if (_is_some) {\n            return unwrap();\n        } else {\n            return def;\n        }\n    }\n    T &unwrap() { return reinterpret_cast<T &>(_data_placeholder); }\n    const T &unwrap() const { return reinterpret_cast<const T &>(_data_placeholder); }\n    void reset()\n    {\n        if (_is_some) {\n            reinterpret_cast<T *>(_data_placeholder)->~T();\n            _is_some = false;\n        }\n    }\n    template <typename... Args>\n    void reset(Args &&... args)\n    {\n        if (_is_some) {\n            reinterpret_cast<T *>(_data_placeholder)->~T();\n        } else {\n            _is_some = true;\n        }\n        new (_data_placeholder) T{std::forward<Args>(args)...};\n    }\n    ~optional() { reset(); }\n};\n}\n"
  },
  {
    "path": "include/dsn/utility/output_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <cmath>\n#include <iomanip>\n#include <iostream>\n#include <sstream>\n#include <string>\n#include <vector>\n\n#include <dsn/cpp/json_helper.h>\n\nnamespace dsn {\nnamespace utils {\n\nclass table_printer;\nclass multi_table_printer;\n\n// Keep the same code style with dsn/cpp/json_helper.h\ntemplate <typename Writer>\nvoid json_encode(Writer &out, const table_printer &tp);\n\n/// A tool used to print data in a table form.\n///\n/// Example usage 1:\n///    table_printer tp(\"sample_data\");\n///    tp.add_title(\"table_title\");\n///    tp.add_column(\"column_name1\");\n///    tp.add_column(\"column_name2\");\n///    for (...) {\n///        tp.add_row(\"row_name_i\");\n///        tp.append_data(int_data);\n///        tp.append_data(double_data);\n///    }\n///\n///    std::ostream out(...);\n///    tp.output(out, output_format::kTabular);\n///\n/// Output looks like:\n///    [sample_data]\n///    table_title  column_name1  column_name2\n///    row_name_1   123           45.67\n///    row_name_2   456           45.68\n///\n/// Example usage 2:\n///    table_printer tp(\"sample_data_2\");\n///    tp.add_row_name_and_data(\"row_name_1\", int_value);\n///    tp.add_row_name_and_data(\"row_name_2\", string_value);\n///\n///    std::ostream out(...);\n///    tp.output(out);\n///\n/// Output looks like:\n///    [sample_data_2]\n///    row_name_1 :  4567\n///    row_name_2 :  hello\n///\nclass table_printer\n{\nprivate:\n    enum class data_mode\n    {\n        kUninitialized = 0,\n        kSingleColumn = 1,\n        kMultiColumns = 2\n    };\n\npublic:\n    enum class alignment\n    {\n        kLeft = 0,\n        kRight = 1,\n    };\n\n    enum class output_format\n    {\n        kTabular = 0,\n        kJsonCompact = 1,\n        kJsonPretty = 2,\n    };\n\n    explicit table_printer(std::string name = \"\", int tabular_width = 2, int precision = 2)\n        : _name(std::move(name)),\n          _mode(data_mode::kUninitialized),\n          _tabular_width(tabular_width),\n          _precision(precision)\n    {\n    }\n\n    // kMultiColumns mode.\n    void add_title(const std::string &title, alignment align = alignment::kLeft);\n    void add_column(const std::string &col_name, alignment align = alignment::kLeft);\n    template <typename T>\n    void add_row(const T &row_name)\n    {\n        check_mode(data_mode::kMultiColumns);\n        _matrix_data.emplace_back(std::vector<std::string>());\n        append_data(row_name);\n    }\n    template <typename T>\n    void append_data(const T &data)\n    {\n        check_mode(data_mode::kMultiColumns);\n        append_string_data(to_string(data));\n    }\n\n    // kSingleColumn mode.\n    template <typename T>\n    void add_row_name_and_data(const std::string &row_name, const T &data)\n    {\n        check_mode(data_mode::kSingleColumn);\n        add_row_name_and_string_data(row_name, to_string(data));\n    }\n\n    // Output result.\n    void output(std::ostream &out, output_format format = output_format::kTabular) const;\n\nprivate:\n    template <typename T>\n    std::string to_string(T data)\n    {\n        return std::to_string(data);\n    }\n\n    void check_mode(data_mode mode);\n\n    void append_string_data(const std::string &data);\n    void add_row_name_and_string_data(const std::string &row_name, const std::string &data);\n\n    void output_in_tabular(std::ostream &out) const;\n    template <typename Writer>\n    void output_in_json(std::ostream &out) const\n    {\n        rapidjson::OStreamWrapper wrapper(out);\n        Writer writer(wrapper);\n        writer.StartObject();\n        json_encode(writer, *this);\n        writer.EndObject();\n        out << std::endl;\n    }\n\nprivate:\n    friend class multi_table_printer;\n    template <typename Writer>\n    friend void json_encode(Writer &out, const table_printer &tp);\n\n    std::string _name;\n    data_mode _mode;\n    int _tabular_width;\n    int _precision;\n    std::vector<bool> _align_left;\n    std::vector<int> _max_col_width;\n    std::vector<std::vector<std::string>> _matrix_data;\n};\n\ntemplate <>\ninline std::string table_printer::to_string<bool>(bool data)\n{\n    return data ? \"true\" : \"false\";\n}\n\ntemplate <>\ninline std::string table_printer::to_string<double>(double data)\n{\n    if (std::abs(data) < 1e-6) {\n        return \"0.00\";\n    } else {\n        std::stringstream s;\n        s << std::fixed << std::setprecision(_precision) << data;\n        return s.str();\n    }\n}\n\ntemplate <>\ninline std::string table_printer::to_string<std::string>(std::string data)\n{\n    return data;\n}\n\ntemplate <>\ninline std::string table_printer::to_string<const char *>(const char *data)\n{\n    return std::string(data);\n}\n\n// Helper to output multiple tables into one large table.\nclass multi_table_printer\n{\npublic:\n    void add(table_printer &&tp);\n    void output(std::ostream &out, table_printer::output_format format) const;\n\nprivate:\n    void output_in_tabular(std::ostream &out) const;\n    template <typename Writer>\n    void output_in_json(std::ostream &out) const\n    {\n        rapidjson::OStreamWrapper wrapper(out);\n        Writer writer(wrapper);\n        writer.StartObject();\n        for (const auto &tp : _tps) {\n            json_encode(writer, tp);\n        }\n        writer.EndObject();\n        out << std::endl;\n    }\n\nprivate:\n    std::vector<table_printer> _tps;\n};\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/ports.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#if defined(__linux__) || defined(__APPLE__)\n\n#include <unistd.h>\n\n#define __selectany __attribute__((weak)) extern\n\n#ifndef O_BINARY\n#define O_BINARY 0\n#endif\n\n#else\n\n#error \"unsupported platform\"\n#endif\n\n// common macros and data structures\n#ifndef FIELD_OFFSET\n#define FIELD_OFFSET(s, field) (((size_t) & ((s *)(10))->field) - 10)\n#endif\n\n#ifndef CONTAINING_RECORD\n#define CONTAINING_RECORD(address, type, field)                                                    \\\n    ((type *)((char *)(address)-FIELD_OFFSET(type, field)))\n#endif\n\n#ifndef MAX_COMPUTERNAME_LENGTH\n#define MAX_COMPUTERNAME_LENGTH 32\n#endif\n\n#ifndef ARRAYSIZE\n#define ARRAYSIZE(a) ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))\n#endif\n\n#define snprintf_p std::snprintf\n#define dsn_likely(pred) (__builtin_expect((pred), 1))\n#define dsn_unlikely(pred) (__builtin_expect((pred), 0))\n\n#define DISALLOW_COPY_AND_ASSIGN(TypeName)                                                         \\\n    TypeName(const TypeName &) = delete;                                                           \\\n    void operator=(const TypeName &) = delete\n\n#if defined OS_LINUX || defined OS_CYGWIN\n\n// _BIG_ENDIAN\n#include <endian.h>\n\n#elif defined __APPLE__\n\n// BIG_ENDIAN\n#include <machine/endian.h> // NOLINT(build/include)\n\n#endif\n\n// Cache line alignment\n#if defined(__i386__) || defined(__x86_64__)\n#define CACHELINE_SIZE 64\n#elif defined(__powerpc64__)\n// TODO(user) This is the L1 D-cache line size of our Power7 machines.\n// Need to check if this is appropriate for other PowerPC64 systems.\n#define CACHELINE_SIZE 128\n#elif defined(__aarch64__)\n#define CACHELINE_SIZE 64\n#elif defined(__arm__)\n// Cache line sizes for ARM: These values are not strictly correct since\n// cache line sizes depend on implementations, not architectures.  There\n// are even implementations with cache line sizes configurable at boot\n// time.\n#if defined(__ARM_ARCH_5T__)\n#define CACHELINE_SIZE 32\n#elif defined(__ARM_ARCH_7A__)\n#define CACHELINE_SIZE 64\n#endif\n#endif\n\n// This is a NOP if CACHELINE_SIZE is not defined.\n#ifdef CACHELINE_SIZE\nstatic_assert((CACHELINE_SIZE & (CACHELINE_SIZE - 1)) == 0 &&\n                  (CACHELINE_SIZE & (sizeof(void *) - 1)) == 0,\n              \"CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *)\");\n#define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))\n#else\n#define CACHELINE_ALIGNED\n#endif\n"
  },
  {
    "path": "include/dsn/utility/preloadable.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename T>\nclass preloadable\n{\nprotected:\n    preloadable() {}\n    DISALLOW_COPY_AND_ASSIGN(preloadable);\n\npublic:\n    static T s_instance;\n};\n\ntemplate <typename T>\nT preloadable<T>::s_instance;\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/priority_queue.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <queue>\n#include <cassert>\n#include <dsn/utility/synchronize.h>\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename T, int priority_count, typename TQueue = std::queue<T>>\nclass priority_queue\n{\npublic:\n    priority_queue(const std::string &name)\n    {\n        _name = name;\n        _count = 0;\n    }\n\n    virtual long enqueue(T obj, uint32_t priority)\n    {\n        assert(priority >= 0 && priority < priority_count); // \"wrong priority\");\n\n        auto_lock<::dsn::utils::ex_lock_nr_spin> l(_lock);\n        {\n            _items[priority].push(obj);\n            return ++_count;\n        }\n    }\n\n    virtual T dequeue()\n    {\n        auto_lock<::dsn::utils::ex_lock_nr_spin> l(_lock);\n        long ct = 0;\n        return dequeue_impl(ct);\n    }\n\n    virtual T dequeue(/*out*/ long &ct)\n    {\n        auto_lock<::dsn::utils::ex_lock_nr_spin> l(_lock);\n        return dequeue_impl(ct);\n    }\n\n    const std::string &get_name() const { return _name; }\n\n    long count() const\n    {\n        auto_lock<::dsn::utils::ex_lock_nr_spin> l(_lock);\n        return _count;\n    }\n\nprotected:\n    T dequeue_impl(/*out*/ long &ct, bool pop = true)\n    {\n        if (_count == 0) {\n            ct = 0;\n            return nullptr;\n        }\n\n        ct = --_count;\n\n        int index = priority_count - 1;\n        for (; index >= 0; index--) {\n            if (_items[index].size() > 0) {\n                break;\n            }\n        }\n\n        assert(index >= 0); // \"must find something\");\n        auto c = _items[index].front();\n        _items[index].pop();\n        return c;\n    }\n\nprotected:\n    std::string _name;\n    TQueue _items[priority_count];\n    long _count;\n    mutable utils::ex_lock_nr_spin _lock;\n};\n\ntemplate <typename T, int priority_count, typename TQueue = std::queue<T>>\nclass blocking_priority_queue : public priority_queue<T, priority_count, TQueue>\n{\npublic:\n    blocking_priority_queue(const std::string &name)\n        : priority_queue<T, priority_count, TQueue>(name)\n    {\n    }\n\n    virtual long enqueue(T obj, uint32_t priority) override\n    {\n        auto r = priority_queue<T, priority_count, TQueue>::enqueue(obj, priority);\n        _sema.signal();\n        return r;\n    }\n\n    T dequeue_with_timeout(/*out*/ long &ct, int milliseconds)\n    {\n        if (!_sema.wait(milliseconds)) {\n            ct = 0;\n            return nullptr;\n        }\n        return priority_queue<T, priority_count, TQueue>::dequeue(ct);\n    }\n\n    virtual T dequeue(/*out*/ long &ct) override { return dequeue_with_timeout(ct, 0xffffffff); }\n\nprivate:\n    semaphore _sema;\n};\n}\n} // end namespace\n"
  },
  {
    "path": "include/dsn/utility/process_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <iostream>\n\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\nnamespace utils {\n///\n/// execute command in a seperate process,\n/// read it's stdout to output\n/// and return the retcode of command\n///\nint pipe_execute(const char *command, std::ostream &output);\n\n///\n/// process_mem_usage(double &, double &) - takes two doubles by reference,\n/// attempts to read the system-dependent data for a process' virtual memory\n/// size and resident set size, and return the results in KB.\n///\n/// On failure, returns 0.0, 0.0\n///\nvoid process_mem_usage(double &vm_usage, double &resident_set);\n\n///\n/// get the thread id.\n/// for best performance, we cache the tid value\n/// in the thread local variable\n///\nconst int INVALID_TID = -1;\n\nstruct tls_tid\n{\n    unsigned int magic;\n    int local_tid;\n};\nextern __thread tls_tid s_tid;\n\nint get_current_tid_internal();\n\ninline int get_current_tid()\n{\n    if (dsn_likely(s_tid.magic == 0xdeadbeef)) {\n        return s_tid.local_tid;\n    } else {\n        s_tid.magic = 0xdeadbeef;\n        s_tid.local_tid = get_current_tid_internal();\n        return s_tid.local_tid;\n    }\n}\n\n///\n/// get the process start time.\n/// please call these functions after the \"main\" function,\n/// otherwise the return values are undefined.\n///\nuint64_t process_start_millis();\nconst char *process_start_date_time_mills();\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/rand.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <cstdint>\n#include <cstddef>\n#include <limits>\n\nnamespace dsn {\nnamespace rand {\n\n/// This package offers several functions for random number generation.\n/// It is guaranteed to be thread-safe by using a PRNG with one instance per thread.\n/// By default, the RNG is seeded from std::random_device.\n\n/// \\returns, as an uint64_t, a non-negative pseudo-random number in [min, max].\nextern uint64_t next_u64(uint64_t min, uint64_t max);\n\n/// \\returns, as an uint64_t, a non-negative pseudo-random number in [0, n).\n/// If n == 0, it returns 0.\ninline uint64_t next_u64(uint64_t n)\n{\n    if (n == 0)\n        return 0;\n    return next_u64(0, n - 1);\n}\n\n/// \\returns a pseudo-random 64-bit value as a uint64_t.\ninline uint64_t next_u64() { return next_u64(0, std::numeric_limits<uint64_t>::max()); }\n\n/// \\returns, as an uint32_t, a non-negative pseudo-random number in [min, max].\ninline uint32_t next_u32(uint32_t min, uint32_t max)\n{\n    return static_cast<uint32_t>(next_u64(min, max));\n}\n\n/// \\returns, as an uint32_t, a non-negative pseudo-random number in [0, n).\n/// If n == 0, it returns 0.\ninline uint32_t next_u32(uint32_t n) { return static_cast<uint32_t>(next_u64(n)); }\n\n/// \\returns a pseudo-random 32-bit value as a uint32_t.\ninline uint32_t next_u32() { return next_u32(0, std::numeric_limits<uint32_t>::max()); }\n\n/// \\returns, as a double, a pseudo-random number in [0.0,1.0].\ninline double next_double01() { return next_u64(0, 1000000000) / 1000000000.0; }\n\n/// Reseeds the RNG of current thread.\nextern void reseed_thread_local_rng(uint64_t seed);\n\n} // namespace rand\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/safe_strerror_posix.h",
    "content": "// Copyright (c) 2011 The Chromium Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style license that can be\n// found in the LICENSE file.\n\n#pragma once\n\n#include <string>\n\nnamespace dsn {\nnamespace utils {\n\n// This file declares safe, portable alternatives to the POSIX strerror()\n// function. strerror() is inherently unsafe in multi-threaded apps and should\n// never be used. Doing so can cause crashes. Additionally, the thread-safe\n// alternative strerror_r varies in semantics across platforms. Use these\n// functions instead.\n\n// Thread-safe strerror function with dependable semantics that never fails.\n// It will write the string form of error \"err\" to buffer buf of length len.\n// If there is an error calling the OS's strerror_r() function then a message to\n// that effect will be printed into buf, truncating if necessary. The final\n// result is always null-terminated. The value of errno is never changed.\n//\n// Use this instead of strerror_r().\nvoid safe_strerror_r(int err, char *buf, size_t len);\n\n// Calls safe_strerror_r with a buffer of suitable size and returns the result\n// in a C++ string.\n//\n// Use this instead of strerror(). Note though that safe_strerror_r will be\n// more robust in the case of heap corruption errors, since it doesn't need to\n// allocate a string.\nstd::string safe_strerror(int err);\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/singleton.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <boost/noncopyable.hpp>\n\nnamespace dsn {\nnamespace utils {\n\n/**\n * Someone can derived from class `singleton<T>` if he wants to make it's class to be a singleton.\n * And it is strongly recommended that making constuctor and destructor to be private.\n * So that the lifecycle of this singleton instance is maintained by the base class `singleton<T>`\n */\n\ntemplate <typename T>\nclass singleton : private boost::noncopyable\n{\npublic:\n    singleton() = default;\n\n    static T &instance()\n    {\n        static T _instance;\n        return _instance;\n    }\n};\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/singleton_store.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/synchronize.h>\n#include <map>\n#include <vector>\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename TKey, typename TValue, typename TCompare = std::less<TKey>>\nclass singleton_store : public dsn::utils::singleton<singleton_store<TKey, TValue, TCompare>>\n{\npublic:\n    bool put(TKey key, TValue val)\n    {\n        auto it = _store.find(key);\n        if (it != _store.end())\n            return false;\n        else {\n            _store.insert(std::make_pair(key, val));\n            return true;\n        }\n    }\n\n    bool get(TKey key, /*out*/ TValue &val) const\n    {\n        auto it = _store.find(key);\n        if (it != _store.end()) {\n            val = it->second;\n            return true;\n        } else\n            return false;\n    }\n\n    bool remove(TKey key) { return _store.erase(key) > 0; }\n\n    void get_all_keys(/*out*/ std::vector<TKey> &keys)\n    {\n        for (auto it = _store.begin(); it != _store.end(); ++it) {\n            keys.push_back(it->first);\n        }\n    }\n\nprivate:\n    std::map<TKey, TValue, TCompare> _store;\n};\n\ntemplate <typename TKey, typename TValue, typename TCompare = std::less<TKey>>\nclass safe_singleton_store\n    : public dsn::utils::singleton<safe_singleton_store<TKey, TValue, TCompare>>\n{\npublic:\n    bool put(TKey key, TValue val)\n    {\n        auto_write_lock l(_lock);\n        auto it = _store.find(key);\n        if (it != _store.end())\n            return false;\n        else {\n            _store.insert(std::make_pair(key, val));\n            return true;\n        }\n    }\n\n    bool get(TKey key, /*out*/ TValue &val) const\n    {\n        auto_read_lock l(_lock);\n        auto it = _store.find(key);\n        if (it != _store.end()) {\n            val = it->second;\n            return true;\n        } else\n            return false;\n    }\n\n    bool remove(TKey key)\n    {\n        auto_write_lock l(_lock);\n        return _store.erase(key) > 0;\n    }\n\n    void get_all_keys(/*out*/ std::vector<TKey> &keys)\n    {\n        auto_read_lock l(_lock);\n        for (auto it = _store.begin(); it != _store.end(); ++it) {\n            keys.push_back(it->first);\n        }\n    }\n\nprivate:\n    std::map<TKey, TValue, TCompare> _store;\n    mutable rw_lock_nr _lock;\n};\n\n//------------- inline implementation ----------\n}\n} // end namespace dsn::utils\n"
  },
  {
    "path": "include/dsn/utility/smart_pointers.h",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <memory>\n\nnamespace dsn {\n\nnamespace memory_internal {\n\n// Traits to select proper overload and return type for `absl::make_unique<>`.\ntemplate <typename T>\nstruct MakeUniqueResult\n{\n    using scalar = std::unique_ptr<T>;\n};\ntemplate <typename T>\nstruct MakeUniqueResult<T[]>\n{\n    using array = std::unique_ptr<T[]>;\n};\ntemplate <typename T, size_t N>\nstruct MakeUniqueResult<T[N]>\n{\n    using invalid = void;\n};\n\n} // namespace memory_internal\n\n#if __cplusplus >= 201402L || defined(_MSC_VER)\nusing std::make_unique;\n#else\n// -----------------------------------------------------------------------------\n// Function Template: make_unique<T>()\n// -----------------------------------------------------------------------------\n//\n// Creates a `std::unique_ptr<>`, while avoiding issues creating temporaries\n// during the construction process. `absl::make_unique<>` also avoids redundant\n// type declarations, by avoiding the need to explicitly use the `new` operator.\n//\n// This implementation of `absl::make_unique<>` is designed for C++11 code and\n// will be replaced in C++14 by the equivalent `std::make_unique<>` abstraction.\n// `absl::make_unique<>` is designed to be 100% compatible with\n// `std::make_unique<>` so that the eventual migration will involve a simple\n// rename operation.\n//\n// For more background on why `std::unique_ptr<T>(new T(a,b))` is problematic,\n// see Herb Sutter's explanation on\n// (Exception-Safe Function Calls)[http://herbsutter.com/gotw/_102/].\n// (In general, reviewers should treat `new T(a,b)` with scrutiny.)\n//\n// Example usage:\n//\n//    auto p = make_unique<X>(args...);  // 'p'  is a std::unique_ptr<X>\n//    auto pa = make_unique<X[]>(5);     // 'pa' is a std::unique_ptr<X[]>\n//\n// Three overloads of `absl::make_unique` are required:\n//\n//   - For non-array T:\n//\n//       Allocates a T with `new T(std::forward<Args> args...)`,\n//       forwarding all `args` to T's constructor.\n//       Returns a `std::unique_ptr<T>` owning that object.\n//\n//   - For an array of unknown bounds T[]:\n//\n//       `absl::make_unique<>` will allocate an array T of type U[] with\n//       `new U[n]()` and return a `std::unique_ptr<U[]>` owning that array.\n//\n//       Note that 'U[n]()' is different from 'U[n]', and elements will be\n//       value-initialized. Note as well that `std::unique_ptr` will perform its\n//       own destruction of the array elements upon leaving scope, even though\n//       the array [] does not have a default destructor.\n//\n//       NOTE: an array of unknown bounds T[] may still be (and often will be)\n//       initialized to have a size, and will still use this overload. E.g:\n//\n//         auto my_array = absl::make_unique<int[]>(10);\n//\n//   - For an array of known bounds T[N]:\n//\n//       `absl::make_unique<>` is deleted (like with `std::make_unique<>`) as\n//       this overload is not useful.\n//\n//       NOTE: an array of known bounds T[N] is not considered a useful\n//       construction, and may cause undefined behavior in templates. E.g:\n//\n//         auto my_array = absl::make_unique<int[10]>();\n//\n//       In those cases, of course, you can still use the overload above and\n//       simply initialize it to its desired size:\n//\n//         auto my_array = absl::make_unique<int[]>(10);\n\n// `absl::make_unique` overload for non-array types.\ntemplate <typename T, typename... Args>\ntypename memory_internal::MakeUniqueResult<T>::scalar make_unique(Args &&... args)\n{\n    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));\n}\n\n// `absl::make_unique` overload for an array T[] of unknown bounds.\n// The array allocation needs to use the `new T[size]` form and cannot take\n// element constructor arguments. The `std::unique_ptr` will manage destructing\n// these array elements.\ntemplate <typename T>\ntypename memory_internal::MakeUniqueResult<T>::array make_unique(size_t n)\n{\n    return std::unique_ptr<T>(new typename std::remove_extent<T>::type[n]);\n}\n\n// `absl::make_unique` overload for an array T[N] of known bounds.\n// This construction will be rejected.\ntemplate <typename T, typename... Args>\ntypename memory_internal::MakeUniqueResult<T>::invalid make_unique(Args &&... /* args */) = delete;\n#endif\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/string_conv.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <climits>\n#include <cmath>\n\n#include <dsn/utility/string_view.h>\n\nnamespace dsn {\n\nnamespace internal {\n\ntemplate <typename T>\nbool buf2signed(string_view buf, T &result)\n{\n    static_assert(std::is_signed<T>::value, \"buf2signed works only with signed integer\");\n\n    if (buf.empty()) {\n        return false;\n    }\n\n    std::string str(buf.data(), buf.length());\n    const int saved_errno = errno;\n    errno = 0;\n    char *p = nullptr;\n    long long v = std::strtoll(str.data(), &p, 0);\n\n    if (p - str.data() != str.length()) {\n        return false;\n    }\n\n    if (v > std::numeric_limits<T>::max() || v < std::numeric_limits<T>::min() || errno != 0) {\n        return false;\n    }\n\n    if (errno == 0) {\n        errno = saved_errno;\n    }\n\n    result = v;\n    return true;\n}\n\ntemplate <typename T>\nbool buf2unsigned(string_view buf, T &result)\n{\n    static_assert(std::is_unsigned<T>::value, \"buf2unsigned works only with unsigned integer\");\n\n    if (buf.empty()) {\n        return false;\n    }\n\n    std::string str(buf.data(), buf.length());\n    const int saved_errno = errno;\n    errno = 0;\n    char *p = nullptr;\n    unsigned long long v = std::strtoull(str.data(), &p, 0);\n\n    if (p - str.data() != str.length()) {\n        return false;\n    }\n\n    if (v > std::numeric_limits<T>::max() || v < std::numeric_limits<T>::min() || errno != 0) {\n        return false;\n    }\n\n    if (errno == 0) {\n        errno = saved_errno;\n    }\n\n    // strtoull() will convert a negative integer to an unsigned integer,\n    // return false in this condition. (but we consider \"-0\" is correct)\n    if (v != 0 && str.find('-') != std::string::npos) {\n        return false;\n    }\n\n    result = v;\n    return true;\n}\n} // namespace internal\n\n/// buf2*: `result` will keep unmodified if false is returned.\n\ninline bool buf2int32(string_view buf, int32_t &result)\n{\n    return internal::buf2signed(buf, result);\n}\n\ninline bool buf2int64(string_view buf, int64_t &result)\n{\n    return internal::buf2signed(buf, result);\n}\n\ninline bool buf2uint32(string_view buf, uint32_t &result)\n{\n    return internal::buf2unsigned(buf, result);\n}\n\ninline bool buf2uint64(string_view buf, uint64_t &result)\n{\n    return internal::buf2unsigned(buf, result);\n}\n\ninline bool buf2bool(string_view buf, bool &result, bool ignore_case = true)\n{\n    std::string data(buf.data(), buf.length());\n    if (ignore_case) {\n        std::transform(data.begin(), data.end(), data.begin(), ::tolower);\n    }\n    if (data == \"true\") {\n        result = true;\n        return true;\n    }\n    if (data == \"false\") {\n        result = false;\n        return true;\n    }\n    return false;\n}\n\ninline bool buf2double(string_view buf, double &result)\n{\n    if (buf.empty()) {\n        return false;\n    }\n\n    std::string str(buf.data(), buf.length());\n    const int saved_errno = errno;\n    errno = 0;\n    char *p = nullptr;\n    double v = std::strtod(str.data(), &p);\n\n    if (p - str.data() != str.length()) {\n        return false;\n    }\n\n    if (v == HUGE_VAL || v == -HUGE_VAL || std::isnan(v) || errno != 0) {\n        return false;\n    }\n\n    if (errno == 0) {\n        errno = saved_errno;\n    }\n\n    result = v;\n    return true;\n}\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/string_splitter.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <stdlib.h>\n#include <stdint.h>\n\nnamespace dsn {\n\nenum empty_field_action\n{\n    SKIP_EMPTY_FIELD,\n    ALLOW_EMPTY_FIELD\n};\n\n// Split a string with one character\nclass string_splitter\n{\npublic:\n    // Split `input' with `separator'. If `action' is SKIP_EMPTY_FIELD, zero-\n    // length() field() will be skipped.\n    inline string_splitter(const char *input,\n                           char separator,\n                           empty_field_action action = SKIP_EMPTY_FIELD);\n\n    // Allows containing embedded '\\0' characters and separator can be '\\0',\n    // if str_end is not NULL.\n    inline string_splitter(const char *str_begin,\n                           const char *str_end,\n                           char separator,\n                           empty_field_action = SKIP_EMPTY_FIELD);\n\n    // Move splitter forward.\n    inline string_splitter &operator++();\n\n    inline string_splitter operator++(int);\n\n    // True iff field() is valid.\n    inline operator const void *() const;\n\n    // Beginning address and length of the field. *(field() + length()) may\n    // not be '\\0' because we don't modify `input'.\n    inline const char *field() const;\n\n    inline size_t length() const;\n\n    // Cast field to specific type, and write the value into `pv'.\n    // Returns 0 on success, -1 otherwise.\n    // NOTE: If separator is a digit, casting functions always return -1.\n    inline int to_int8(int8_t *pv) const;\n\n    inline int to_uint8(uint8_t *pv) const;\n\n    inline int to_int(int *pv) const;\n\n    inline int to_uint(unsigned int *pv) const;\n\n    inline int to_long(long *pv) const;\n\n    inline int to_ulong(unsigned long *pv) const;\n\n    inline int to_longlong(long long *pv) const;\n\n    inline int to_ulonglong(unsigned long long *pv) const;\n\n    inline int to_float(float *pv) const;\n\n    inline int to_double(double *pv) const;\n\nprivate:\n    inline bool not_end(const char *p) const;\n\n    inline void init();\n\n    const char *_head;\n    const char *_tail;\n    const char *_str_tail;\n    const char _sep;\n    const empty_field_action _empty_field_action;\n};\n\nstring_splitter::string_splitter(const char *str, char sep, empty_field_action action)\n    : _head(str), _str_tail(NULL), _sep(sep), _empty_field_action(action)\n{\n    init();\n}\n\nstring_splitter::string_splitter(const char *str_begin,\n                                 const char *str_end,\n                                 const char sep,\n                                 empty_field_action action)\n    : _head(str_begin), _str_tail(str_end), _sep(sep), _empty_field_action(action)\n{\n    init();\n}\n\nvoid string_splitter::init()\n{\n    // Find the starting _head and _tail.\n    if (__builtin_expect(_head != NULL, 1)) {\n        if (_empty_field_action == SKIP_EMPTY_FIELD) {\n            for (; _sep == *_head && not_end(_head); ++_head) {\n            }\n        }\n        for (_tail = _head; *_tail != _sep && not_end(_tail); ++_tail) {\n        }\n    } else {\n        _tail = NULL;\n    }\n}\n\nstring_splitter &string_splitter::operator++()\n{\n    if (__builtin_expect(_tail != NULL, 1)) {\n        if (not_end(_tail)) {\n            ++_tail;\n            if (_empty_field_action == SKIP_EMPTY_FIELD) {\n                for (; _sep == *_tail && not_end(_tail); ++_tail) {\n                }\n            }\n        }\n        _head = _tail;\n        for (; *_tail != _sep && not_end(_tail); ++_tail) {\n        }\n    }\n    return *this;\n}\n\nstring_splitter string_splitter::operator++(int)\n{\n    string_splitter tmp = *this;\n    operator++();\n    return tmp;\n}\n\nstring_splitter::operator const void *() const\n{\n    return (_head != NULL && not_end(_head)) ? _head : NULL;\n}\n\nconst char *string_splitter::field() const { return _head; }\n\nsize_t string_splitter::length() const { return static_cast<size_t>(_tail - _head); }\n\nbool string_splitter::not_end(const char *p) const\n{\n    return (_str_tail == NULL) ? *p : (p != _str_tail);\n}\n\nint string_splitter::to_int8(int8_t *pv) const\n{\n    long v = 0;\n    if (to_long(&v) == 0 && v >= -128 && v <= 127) {\n        *pv = (int8_t)v;\n        return 0;\n    }\n    return -1;\n}\n\nint string_splitter::to_uint8(uint8_t *pv) const\n{\n    unsigned long v = 0;\n    if (to_ulong(&v) == 0 && v <= 255) {\n        *pv = (uint8_t)v;\n        return 0;\n    }\n    return -1;\n}\n\nint string_splitter::to_int(int *pv) const\n{\n    long v = 0;\n    if (to_long(&v) == 0 && v >= INT_MIN && v <= INT_MAX) {\n        *pv = (int)v;\n        return 0;\n    }\n    return -1;\n}\n\nint string_splitter::to_uint(unsigned int *pv) const\n{\n    unsigned long v = 0;\n    if (to_ulong(&v) == 0 && v <= UINT_MAX) {\n        *pv = (unsigned int)v;\n        return 0;\n    }\n    return -1;\n}\n\nint string_splitter::to_long(long *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtol(field(), &endptr, 10);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\nint string_splitter::to_ulong(unsigned long *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtoul(field(), &endptr, 10);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\nint string_splitter::to_longlong(long long *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtoll(field(), &endptr, 10);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\nint string_splitter::to_ulonglong(unsigned long long *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtoull(field(), &endptr, 10);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\nint string_splitter::to_float(float *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtof(field(), &endptr);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\nint string_splitter::to_double(double *pv) const\n{\n    char *endptr = NULL;\n    *pv = strtod(field(), &endptr);\n    return (endptr == field() + length()) ? 0 : -1;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/string_view.h",
    "content": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n//\n// -----------------------------------------------------------------------------\n// File: string_view.h\n// -----------------------------------------------------------------------------\n//\n// This file contains the definition of the `dsn::string_view` class. A\n// `string_view` points to a contiguous span of characters, often part or all of\n// another `std::string`, double-quoted std::string literal, character array, or even\n// another `string_view`.\n//\n// This `dsn::string_view` abstraction is designed to be a drop-in\n// replacement for the C++17 `std::string_view` abstraction.\n//\n// --- Update(wutao1) ---\n//\n// This file is copied from abseil, though in order to maintain minimum\n// dependencies, abseil is not an requirement. The dsn::string_view consists of only\n// a subset of functions that std::string_view and absl::string_view provide, so that\n// we can keep this module lightweight, but reducing the generality.\n//\n// dsn::string_view also supports view of dsn::blob, which can also function as a constant\n// view. However, dsn::blob is not designed to be as lightweight as dsn::string_view\n// since it requires at least one atomic operation to copy the internal std::shared_ptr.\n// So in most cases where data is immutable, using dsn::string_view over dsn::blob will\n// be a more proper choice.\n\n#pragma once\n\n#include <cstring>\n#include <algorithm>\n#include <cassert>\n#include <dsn/utility/blob.h>\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\n\n// dsn::string_view\n//\n// A `string_view` provides a lightweight view into the std::string data provided by\n// a `std::string`, double-quoted std::string literal, character array, or even\n// another `string_view`. A `string_view` does *not* own the std::string to which it\n// points, and that data cannot be modified through the view.\n//\n// You can use `string_view` as a function or method parameter anywhere a\n// parameter can receive a double-quoted std::string literal, `const char*`,\n// `std::string`, or another `absl::string_view` argument with no need to copy\n// the std::string data. Systematic use of `string_view` within function arguments\n// reduces data copies and `strlen()` calls.\n//\n// Because of its small size, prefer passing `string_view` by value:\n//\n//   void MyFunction(dsn::string_view arg);\n//\n// If circumstances require, you may also pass one by const reference:\n//\n//   void MyFunction(const dsn::string_view& arg);  // not preferred\n//\n// Passing by value generates slightly smaller code for many architectures.\n//\n// In either case, the source data of the `string_view` must outlive the\n// `string_view` itself.\n//\n// A `string_view` is also suitable for local variables if you know that the\n// lifetime of the underlying object is longer than the lifetime of your\n// `string_view` variable. However, beware of binding a `string_view` to a\n// temporary value:\n//\n//   // BAD use of string_view: lifetime problem\n//   dsn::string_view sv = obj.ReturnAString();\n//\n//   // GOOD use of string_view: str outlives sv\n//   std::string str = obj.ReturnAString();\n//   dsn::string_view sv = str;\n//\n// Due to lifetime issues, a `string_view` is sometimes a poor choice for a\n// return value and usually a poor choice for a data member. If you do use a\n// `string_view` this way, it is your responsibility to ensure that the object\n// pointed to by the `string_view` outlives the `string_view`.\n//\n// A `string_view` may represent a whole std::string or just part of a std::string. For\n// example, when splitting a std::string, `std::vector<dsn::string_view>` is a\n// natural data type for the output.\n//\n//\n// When constructed from a source which is nul-terminated, the `string_view`\n// itself will not include the nul-terminator unless a specific size (including\n// the nul) is passed to the constructor. As a result, common idioms that work\n// on nul-terminated strings do not work on `string_view` objects. If you write\n// code that scans a `string_view`, you must check its length rather than test\n// for nul, for example. Note, however, that nuls may still be embedded within\n// a `string_view` explicitly.\n//\n// You may create a null `string_view` in two ways:\n//\n//   dsn::string_view sv();\n//   dsn::string_view sv(nullptr, 0);\n//\n// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and\n// `sv.empty() == true`. Also, if you create a `string_view` with a non-null\n// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to\n// signal an undefined value that is different from other `string_view` values\n// in a similar fashion to how `const char* p1 = nullptr;` is different from\n// `const char* p2 = \"\";`. However, in practice, it is not recommended to rely\n// on this behavior.\n//\n// Be careful not to confuse a null `string_view` with an empty one. A null\n// `string_view` is an empty `string_view`, but some empty `string_view`s are\n// not null. Prefer checking for emptiness over checking for null.\n//\n// There are many ways to create an empty string_view:\n//\n//   const char* nullcp = nullptr;\n//   // string_view.size() will return 0 in all cases.\n//   dsn::string_view();\n//   dsn::string_view(nullcp, 0);\n//   dsn::string_view(\"\");\n//   dsn::string_view(\"\", 0);\n//   dsn::string_view(\"abcdef\", 0);\n//   dsn::string_view(\"abcdef\" + 6, 0);\n//\n// All empty `string_view` objects whether null or not, are equal:\n//\n//   dsn::string_view() == dsn::string_view(\"\", 0)\n//   dsn::string_view(nullptr, 0) == dsn::string_view(\"abcdef\"+6, 0)\nclass string_view\n{\npublic:\n    using traits_type = std::char_traits<char>;\n    using value_type = char;\n    using pointer = char *;\n    using const_pointer = const char *;\n    using reference = char &;\n    using const_reference = const char &;\n    using const_iterator = const char *;\n    using iterator = const_iterator;\n    using const_reverse_iterator = std::reverse_iterator<const_iterator>;\n    using reverse_iterator = const_reverse_iterator;\n    using size_type = size_t;\n    using difference_type = std::ptrdiff_t;\n\n    static constexpr size_type npos = static_cast<size_type>(-1);\n\n    // Null `string_view` constructor\n    constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}\n\n    // Implicit constructors\n\n    template <typename Allocator>\n    string_view( // NOLINT(runtime/explicit)\n        const std::basic_string<char, std::char_traits<char>, Allocator> &str) noexcept\n        : ptr_(str.data()), length_(str.size())\n    {\n    }\n\n    string_view(const blob &buf) noexcept // NOLINT(runtime/explicit)\n        : ptr_(buf.data()),\n          length_(buf.length())\n    {\n    }\n\n    constexpr string_view(const char *str) // NOLINT(runtime/explicit)\n        : ptr_(str),\n          length_(str == nullptr ? 0 : traits_type::length(str))\n    {\n    }\n\n    // Implicit constructor of a `string_view` from a `const char*` and length.\n    constexpr string_view(const char *data, size_type len) : ptr_(data), length_(len) {}\n\n    // NOTE: Harmlessly omitted to work around gdb bug.\n    //   constexpr string_view(const string_view&) noexcept = default;\n    //   string_view& operator=(const string_view&) noexcept = default;\n\n    // Iterators\n\n    // string_view::begin()\n    //\n    // Returns an iterator pointing to the first character at the beginning of the\n    // `string_view`, or `end()` if the `string_view` is empty.\n    constexpr const_iterator begin() const noexcept { return ptr_; }\n\n    // string_view::end()\n    //\n    // Returns an iterator pointing just beyond the last character at the end of\n    // the `string_view`. This iterator acts as a placeholder; attempting to\n    // access it results in undefined behavior.\n    constexpr const_iterator end() const noexcept { return ptr_ + length_; }\n\n    // string_view::cbegin()\n    //\n    // Returns a const iterator pointing to the first character at the beginning\n    // of the `string_view`, or `end()` if the `string_view` is empty.\n    constexpr const_iterator cbegin() const noexcept { return begin(); }\n\n    // string_view::cend()\n    //\n    // Returns a const iterator pointing just beyond the last character at the end\n    // of the `string_view`. This pointer acts as a placeholder; attempting to\n    // access its element results in undefined behavior.\n    constexpr const_iterator cend() const noexcept { return end(); }\n\n    // string_view::rbegin()\n    //\n    // Returns a reverse iterator pointing to the last character at the end of the\n    // `string_view`, or `rend()` if the `string_view` is empty.\n    const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }\n\n    // string_view::rend()\n    //\n    // Returns a reverse iterator pointing just before the first character at the\n    // beginning of the `string_view`. This pointer acts as a placeholder;\n    // attempting to access its element results in undefined behavior.\n    const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }\n\n    // string_view::crbegin()\n    //\n    // Returns a const reverse iterator pointing to the last character at the end\n    // of the `string_view`, or `crend()` if the `string_view` is empty.\n    const_reverse_iterator crbegin() const noexcept { return rbegin(); }\n\n    // string_view::crend()\n    //\n    // Returns a const reverse iterator pointing just before the first character\n    // at the beginning of the `string_view`. This pointer acts as a placeholder;\n    // attempting to access its element results in undefined behavior.\n    const_reverse_iterator crend() const noexcept { return rend(); }\n\n    // Capacity Utilities\n\n    // string_view::size()\n    //\n    // Returns the number of characters in the `string_view`.\n    constexpr size_type size() const noexcept { return length_; }\n\n    // string_view::length()\n    //\n    // Returns the number of characters in the `string_view`. Alias for `size()`.\n    constexpr size_type length() const noexcept { return size(); }\n\n    // string_view::empty()\n    //\n    // Checks if the `string_view` is empty (refers to no characters).\n    constexpr bool empty() const noexcept { return length_ == 0; }\n\n    // std::string:view::operator[]\n    //\n    // Returns the ith element of an `string_view` using the array operator.\n    // Note that this operator does not perform any bounds checking.\n    constexpr const_reference operator[](size_type i) const { return ptr_[i]; }\n\n    // string_view::front()\n    //\n    // Returns the first element of a `string_view`.\n    constexpr const_reference front() const { return ptr_[0]; }\n\n    // string_view::back()\n    //\n    // Returns the last element of a `string_view`.\n    constexpr const_reference back() const { return ptr_[size() - 1]; }\n\n    // string_view::data()\n    //\n    // Returns a pointer to the underlying character array (which is of course\n    // stored elsewhere). Note that `string_view::data()` may contain embedded nul\n    // characters, but the returned buffer may or may not be nul-terminated;\n    // therefore, do not pass `data()` to a routine that expects a nul-terminated\n    // std::string.\n    constexpr const_pointer data() const noexcept { return ptr_; }\n\n    // Modifiers\n\n    // string_view::remove_prefix()\n    //\n    // Removes the first `n` characters from the `string_view`. Note that the\n    // underlying std::string is not changed, only the view.\n    void remove_prefix(size_type n)\n    {\n        assert(n <= length_);\n        ptr_ += n;\n        length_ -= n;\n    }\n\n    // string_view::remove_suffix()\n    //\n    // Removes the last `n` characters from the `string_view`. Note that the\n    // underlying std::string is not changed, only the view.\n    void remove_suffix(size_type n)\n    {\n        assert(n <= length_);\n        length_ -= n;\n    }\n\n    // string_view::swap()\n    //\n    // Swaps this `string_view` with another `string_view`.\n    void swap(string_view &s) noexcept\n    {\n        auto t = *this;\n        *this = s;\n        s = t;\n    }\n\n    // Explicit conversion operators\n\n    // Converts to `std::basic_string`.\n    template <typename A>\n    explicit operator std::basic_string<char, traits_type, A>() const\n    {\n        if (!data())\n            return {};\n        return std::basic_string<char, traits_type, A>(data(), size());\n    }\n\n    // string_view::substr()\n    //\n    // Returns a \"substring\" of the `string_view` (at offset `pos` and length\n    // `n`) as another string_view. This function throws `std::out_of_bounds` if\n    // `pos > size'.\n    string_view substr(size_type pos, size_type n = npos) const\n    {\n        if (dsn_unlikely(pos > length_))\n            throw std::out_of_range(\"absl::string_view::substr\");\n        n = std::min(n, length_ - pos);\n        return string_view(ptr_ + pos, n);\n    }\n\n    // string_view::compare()\n    //\n    // Performs a lexicographical comparison between the `string_view` and\n    // another `dsn::string_view), returning -1 if `this` is less than, 0 if\n    // `this` is equal to, and 1 if `this` is greater than the passed std::string\n    // view. Note that in the case of data equality, a further comparison is made\n    // on the respective sizes of the two `string_view`s to determine which is\n    // smaller, equal, or greater.\n    int compare(string_view x) const noexcept\n    {\n        auto min_length = std::min(length_, x.length_);\n        if (min_length > 0) {\n            int r = memcmp(ptr_, x.ptr_, min_length);\n            if (r < 0)\n                return -1;\n            if (r > 0)\n                return 1;\n        }\n        if (length_ < x.length_)\n            return -1;\n        if (length_ > x.length_)\n            return 1;\n        return 0;\n    }\n\n    // Overload of `string_view::compare()` for comparing a substring of the\n    // 'string_view` and another `absl::string_view`.\n    int compare(size_type pos1, size_type count1, string_view v) const\n    {\n        return substr(pos1, count1).compare(v);\n    }\n\n    // Overload of `string_view::compare()` for comparing a substring of the\n    // `string_view` and a substring of another `absl::string_view`.\n    int\n    compare(size_type pos1, size_type count1, string_view v, size_type pos2, size_type count2) const\n    {\n        return substr(pos1, count1).compare(v.substr(pos2, count2));\n    }\n\n    // Overload of `string_view::compare()` for comparing a `string_view` and a\n    // a different  C-style std::string `s`.\n    int compare(const char *s) const { return compare(string_view(s)); }\n\n    // Overload of `string_view::compare()` for comparing a substring of the\n    // `string_view` and a different std::string C-style std::string `s`.\n    int compare(size_type pos1, size_type count1, const char *s) const\n    {\n        return substr(pos1, count1).compare(string_view(s));\n    }\n\n    // Overload of `string_view::compare()` for comparing a substring of the\n    // `string_view` and a substring of a different C-style std::string `s`.\n    int compare(size_type pos1, size_type count1, const char *s, size_type count2) const\n    {\n        return substr(pos1, count1).compare(string_view(s, count2));\n    }\n\n    // string_view::find()\n    //\n    // Finds the first occurrence of the substring `s` within the `string_view`,\n    // returning the position of the first character's match, or `npos` if no\n    // match was found.\n    size_type find(string_view s, size_type pos = 0) const noexcept;\n\nprivate:\n    const char *ptr_;\n    size_type length_;\n};\n\n// This large function is defined inline so that in a fairly common case where\n// one of the arguments is a literal, the compiler can elide a lot of the\n// following comparisons.\ninline bool operator==(string_view x, string_view y) noexcept\n{\n    auto len = x.size();\n    if (len != y.size()) {\n        return false;\n    }\n    return x.data() == y.data() || len <= 0 || memcmp(x.data(), y.data(), len) == 0;\n}\n\ninline bool operator!=(string_view x, string_view y) noexcept { return !(x == y); }\n\n// IO Insertion Operator\nstd::ostream &operator<<(std::ostream &o, string_view piece);\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/strings.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <string>\n#include <vector>\n#include <list>\n#include <map>\n#include <unordered_set>\n#include <iostream>\n\nnamespace dsn {\nnamespace utils {\n\nvoid split_args(const char *args,\n                /*out*/ std::vector<std::string> &sargs,\n                char splitter = ' ',\n                bool keep_place_holder = false);\n\nvoid split_args(const char *args,\n                /*out*/ std::unordered_set<std::string> &sargs,\n                char splitter = ' ',\n                bool keep_place_holder = false);\n\nvoid split_args(const char *args, /*out*/ std::list<std::string> &sargs, char splitter = ' ');\n\n// kv_map sample (when item_splitter = ',' and kv_splitter = ':'):\n//   k1:v1,k2:v2,k3:v3\n// we say that 'k1:v1' is an item.\n// return false if:\n//   - bad format: no kv_splitter found in any non-empty item\n//   - allow_dup_key = false and the same key appears for more than once\n// if allow_dup_key = true and the same key appears for more than once,\n// the last value will be returned.\nbool parse_kv_map(const char *args,\n                  /*out*/ std::map<std::string, std::string> &kv_map,\n                  char item_splitter,\n                  char kv_splitter,\n                  bool allow_dup_key = false);\n\n// format sample (when item_splitter = ',' and kv_splitter = ':'):\n//   k1:v1,k2:v2,k3:v3\nvoid kv_map_to_stream(const std::map<std::string, std::string> &kv_map,\n                      /*out*/ std::ostream &oss,\n                      char item_splitter,\n                      char kv_splitter);\nstd::string kv_map_to_string(const std::map<std::string, std::string> &kv_map,\n                             char item_splitter,\n                             char kv_splitter);\n\nstd::string\nreplace_string(std::string subject, const std::string &search, const std::string &replace);\n\nstd::string get_last_component(const std::string &input, const char splitters[]);\n\nchar *trim_string(char *s);\n\n// calculate the md5 checksum of buffer\nstd::string string_md5(const char *buffer, unsigned int length);\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/synchronize.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/hpc_locks/benaphore.h>\n#include <dsn/utility/hpc_locks/autoresetevent.h>\n#include <dsn/utility/hpc_locks/rwlock.h>\n\nnamespace dsn {\nnamespace utils {\n\nclass ex_lock\n{\npublic:\n    __inline void lock() { _lock.lock(); }\n    __inline bool try_lock() { return _lock.tryLock(); }\n    __inline void unlock() { _lock.unlock(); }\nprivate:\n    RecursiveBenaphore _lock;\n};\n\nclass ex_lock_nr\n{\npublic:\n    __inline void lock() { _lock.lock(); }\n    __inline bool try_lock() { return _lock.tryLock(); }\n    __inline void unlock() { _lock.unlock(); }\nprivate:\n    NonRecursiveBenaphore _lock;\n};\n\nclass ex_lock_nr_spin\n{\npublic:\n    __inline ex_lock_nr_spin() { _l = 0; }\n\n    __inline void lock()\n    {\n        while (!try_lock()) {\n            while (_l.load(std::memory_order_consume) == 1) {\n            }\n        }\n    }\n\n    __inline bool try_lock() { return 0 == _l.exchange(1, std::memory_order_acquire); }\n\n    __inline void unlock() { _l.store(0, std::memory_order_release); }\n\nprivate:\n    std::atomic<int> _l;\n};\n\nclass rw_lock_nr\n{\npublic:\n    rw_lock_nr() {}\n    ~rw_lock_nr() {}\n\n    __inline void lock_read() { _lock.lockReader(); }\n    __inline void unlock_read() { _lock.unlockReader(); }\n    __inline bool try_lock_read() { return _lock.tryLockReader(); }\n\n    __inline void lock_write() { _lock.lockWriter(); }\n    __inline void unlock_write() { _lock.unlockWriter(); }\n    __inline bool try_lock_write() { return _lock.tryLockWriter(); }\n\nprivate:\n    NonRecursiveRWLock _lock;\n};\n\nclass notify_event\n{\npublic:\n    __inline void notify() { _ready.signal(); }\n    __inline void wait() { _ready.wait(); }\n    __inline bool wait_for(int milliseconds)\n    {\n        if (TIME_MS_MAX == static_cast<unsigned int>(milliseconds)) {\n            _ready.wait();\n            return true;\n        } else\n            return _ready.wait(milliseconds);\n    }\n\nprivate:\n    AutoResetEvent _ready;\n};\n\nclass semaphore\n{\npublic:\n    semaphore(int initial_count = 0) : _sema(initial_count, 128) {}\n\n    ~semaphore() {}\n\npublic:\n    inline void signal() { signal(1); }\n\n    inline void signal(int count) { _sema.signal(count); }\n\n    inline void wait() { _sema.wait(); }\n\n    inline bool wait(int milliseconds)\n    {\n        if (TIME_MS_MAX == static_cast<unsigned int>(milliseconds)) {\n            _sema.wait();\n            return true;\n        } else\n            return _sema.wait(milliseconds);\n    }\n\n    inline bool release()\n    {\n        _sema.signal();\n        return true;\n    }\n\nprivate:\n    LightweightSemaphore _sema;\n};\n\n//--------------------- helpers --------------------------------------\ntemplate <typename T>\nclass auto_lock\n{\npublic:\n    auto_lock(T &lock) : _lock(&lock) { _lock->lock(); }\n    ~auto_lock() { _lock->unlock(); }\n\nprivate:\n    T *_lock;\n\n    auto_lock(const auto_lock &);\n    auto_lock &operator=(const auto_lock &);\n};\n\nclass auto_read_lock\n{\npublic:\n    auto_read_lock(rw_lock_nr &lock) : _lock(&lock) { _lock->lock_read(); }\n    ~auto_read_lock() { _lock->unlock_read(); }\n\nprivate:\n    rw_lock_nr *_lock;\n};\n\nclass auto_write_lock\n{\npublic:\n    auto_write_lock(rw_lock_nr &lock) : _lock(&lock) { _lock->lock_write(); }\n    ~auto_write_lock() { _lock->unlock_write(); }\n\nprivate:\n    rw_lock_nr *_lock;\n};\n}\n}\n"
  },
  {
    "path": "include/dsn/utility/sys_exit_hook.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/enum_helper.h>\n\nnamespace dsn {\n\nenum sys_exit_type\n{\n    SYS_EXIT_NORMAL,\n    SYS_EXIT_BREAK, // Ctrl-C/Break,Shutdown,LogOff, see SetConsoleCtrlHandler\n    SYS_EXIT_EXCEPTION,\n\n    SYS_EXIT_INVALID\n};\n\nENUM_BEGIN(sys_exit_type, SYS_EXIT_INVALID)\nENUM_REG(SYS_EXIT_NORMAL)\nENUM_REG(SYS_EXIT_BREAK)\nENUM_REG(SYS_EXIT_EXCEPTION)\nENUM_END(sys_exit_type)\n\nnamespace tools {\nDSN_API extern join_point<void, sys_exit_type> sys_exit;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/timer.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/chrono_literals.h>\n\nnamespace dsn {\n\nclass timer\n{\npublic:\n    timer() = default;\n\n    // Start this timer\n    void start()\n    {\n        _start = std::chrono::system_clock::now();\n        _stop = _start;\n    }\n\n    // Stop this timer\n    void stop() { _stop = std::chrono::system_clock::now(); }\n\n    // Get the elapse from start() to stop(), in various units.\n    std::chrono::nanoseconds n_elapsed() const\n    {\n        return std::chrono::duration_cast<std::chrono::nanoseconds>(_stop - _start);\n    }\n    std::chrono::microseconds u_elapsed() const\n    {\n        return std::chrono::duration_cast<std::chrono::microseconds>(_stop - _start);\n    }\n    std::chrono::milliseconds m_elapsed() const\n    {\n        return std::chrono::duration_cast<std::chrono::milliseconds>(_stop - _start);\n    }\n    std::chrono::seconds s_elapsed() const\n    {\n        return std::chrono::duration_cast<std::chrono::seconds>(_stop - _start);\n    }\n\nprivate:\n    std::chrono::time_point<std::chrono::system_clock> _stop;\n    std::chrono::time_point<std::chrono::system_clock> _start;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/utility/string_view.h>\n\n#define TIME_MS_MAX 0xffffffff\n\n// The COMPILE_ASSERT macro can be used to verify that a compile time\n// expression is true. For example, you could use it to verify the\n// size of a static array:\n//\n//   COMPILE_ASSERT(ARRAYSIZE(content_type_names) == CONTENT_NUM_TYPES,\n//                  content_type_names_incorrect_size);\n//\n// or to make sure a struct is smaller than a certain size:\n//\n//   COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large);\n//\n// The second argument to the macro is the name of the variable. If\n// the expression is false, most compilers will issue a warning/error\n// containing the name of the variable.\nstruct CompileAssert\n{\n};\n\n#define COMPILE_ASSERT(expr, msg) static const CompileAssert msg[bool(expr) ? 1 : -1]\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename T>\nstd::shared_ptr<T> make_shared_array(size_t size)\n{\n    return std::shared_ptr<T>(new T[size], std::default_delete<T[]>());\n}\n\n// get host name from ip series\n// if can't get a hostname from ip(maybe no hostname or other errors), return false, and\n// hostname_result will be invalid value\n// if multiple hostname got and all of them are resolvable return true, otherwise return false.\n// and the hostname_result will be \"hostname1,hostname2(or ip_address or )...\"\n// we only support ipv4 currently\n// check if a.b.c.d:port can be resolved to hostname:port. If it can be resolved, return true\n// and hostname_result\n// will be the hostname, or it will be ip address or error message\n\n// valid a.b.c.d -> return TRUE && hostname_result=hostname | invalid a.b.c.d:port1 -> return\n// FALSE\n// && hostname_result=a.b.c.d\nbool hostname_from_ip(const char *ip, std::string *hostname_result);\n\n// valid a.b.c.d：port -> return TRUE && hostname_result=hostname:port | invalid a.b.c.d:port1\n// ->\n// return FALSE  && hostname_result=a.b.c.d:port\nbool hostname_from_ip_port(const char *ip_port, std::string *hostname_result);\n\n// valid a.b.c.d,e.f.g.h -> return TRUE && hostname_result_list=hostname1,hostname2 | invalid\n// a.b.c.d,e.f.g.h -> return TRUE && hostname_result_list=a.b.c.d,e.f.g.h\nbool list_hostname_from_ip(const char *ip_port_list, std::string *hostname_result_list);\n\n// valid a.b.c.d:port1,e.f.g.h:port2 -> return TRUE &&\n// hostname_result_list=hostname1:port1,hostname2:port2 | invalid a.b.c.d:port1,e.f.g.h:port2 ->\n// return TRUE && hostname_result_list=a.b.c.d:port1,e.f.g.h:port2\nbool list_hostname_from_ip_port(const char *ip_port_list, std::string *hostname_result_list);\n\n// valid_ipv4_rpc_address return TRUE && hostname_result=hostname:port | invalid_ipv4 -> return\n// FALSE\nbool hostname(const dsn::rpc_address &address, std::string *hostname_result);\n\n// valid_ip_network_order -> return TRUE && hostname_result=hostname\t|\n// invalid_ip_network_order -> return FALSE\nbool hostname_from_ip(uint32_t ip, std::string *hostname_result);\n\ntemplate <typename A, typename B>\nstd::multimap<B, A> flip_map(const std::map<A, B> &source)\n{\n    std::multimap<B, A> target;\n    std::transform(source.begin(),\n                   source.end(),\n                   std::inserter(target, target.begin()),\n                   [](const std::pair<A, B> &p) { return std::pair<B, A>(p.second, p.first); });\n    return target;\n}\n\ntemplate <typename T>\nstd::set<T> get_intersection(const std::set<T> &set1, const std::set<T> &set2)\n{\n    std::set<T> intersection;\n    std::set_intersection(set1.begin(),\n                          set1.end(),\n                          set2.begin(),\n                          set2.end(),\n                          std::inserter(intersection, intersection.begin()));\n    return intersection;\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utility/work_queue.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the work queue abstraction\n *\n * Revision history:\n *     Oct., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/link.h>\n#include <cassert>\n\nnamespace dsn {\n\ntemplate <typename T>\nclass work_queue\n{\npublic:\n    work_queue(int max_concurrent_op = 1) : _max_concurrent_op(max_concurrent_op)\n    {\n        _current_op_count = 0;\n    }\n\n    ~work_queue()\n    {\n        scope_lk l(_lock);\n        assert(_hdr.is_empty());\n        //\"work queue is deleted when there are still %d running ops or pending work items in queue\"\n    }\n\n    // return not-null for what's to be run next\n    T *add_work(T *dl, void *ctx)\n    {\n        scope_lk l(_lock);\n        _hdr.add(dl);\n\n        // allocate slot and run\n        if (_current_op_count == _max_concurrent_op)\n            return nullptr;\n        else {\n            _current_op_count++;\n            return unlink_next_workload(ctx);\n        }\n    }\n\n    // called when the current operation is completed,\n    // which triggers further round of operations as returned\n    T *on_work_completed(T *running, void *ctx)\n    {\n        scope_lk l(_lock);\n        _current_op_count--;\n\n        // no further workload\n        if (_hdr.is_empty()) {\n            return nullptr;\n        }\n\n        // run further workload\n        else {\n            _current_op_count++;\n            return unlink_next_workload(ctx);\n        }\n    }\n\nprotected:\n    // lock is already hold\n    virtual T *unlink_next_workload(void *ctx) { return _hdr.pop_one(); }\n\n    void reset_max_concurrent_ops(int max_c) { _max_concurrent_op = max_c; }\n\nprivate:\n    typedef utils::auto_lock<utils::ex_lock_nr_spin> scope_lk;\n    utils::ex_lock_nr_spin _lock;\n    int _current_op_count;\n    int _max_concurrent_op;\n\nprotected:\n    slist<T> _hdr;\n};\n}"
  },
  {
    "path": "include/dsn/utils/latency_tracer.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/flags.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/task_code.h>\n#include <dsn/dist/replication/replication.codes.h>\n\nnamespace dsn {\nnamespace utils {\n\n#define ADD_POINT(tracer)                                                                          \\\n    do {                                                                                           \\\n        if (dsn_unlikely(tracer != nullptr && (tracer)->enabled()))                                \\\n            (tracer)->add_point(fmt::format(\"{}:{}:{}\", __FILENAME__, __LINE__, __FUNCTION__));    \\\n    } while (0)\n\n#define ADD_CUSTOM_POINT(tracer, message)                                                          \\\n    do {                                                                                           \\\n        if (dsn_unlikely(tracer != nullptr && (tracer)->enabled()))                                \\\n            (tracer)->add_point(                                                                   \\\n                fmt::format(\"{}:{}:{}_{}\", __FILENAME__, __LINE__, __FUNCTION__, (message)));      \\\n    } while (0)\n\n#define APPEND_EXTERN_POINT(tracer, ts, message)                                                   \\\n    do {                                                                                           \\\n        if (dsn_unlikely(tracer != nullptr && (tracer)->enabled()))                                \\\n            (tracer)->append_point(                                                                \\\n                fmt::format(\"{}:{}:{}_{}\", __FILENAME__, __LINE__, __FUNCTION__, (message)),       \\\n                (ts));                                                                             \\\n    } while (0)\n\n/**\n * latency_tracer is a tool for tracking the time spent in each of the stages during request\n * execution. It can help users to figure out where the latency bottleneck is located. User needs to\n * use `add_point` before entering one stage, which will record the name of this stage and its start\n * time. When the request is finished, the formatted result can be dumped automatically in\n * deconstructer\n *\n * For example, given a request with a 4-stage pipeline (the `latency_tracer` need to\n * be held by this request throughout the execution):\n *\n * ```\n * class request {\n *      latency_tracer tracer;\n * }\n * void start(request req){\n *      req.tracer.add_point(\"start\");\n * }\n * void stageA(request req){\n *      req.tracer.add_point(\"stageA\");\n * }\n * void stageB(request req){\n *      req.tracer.add_point(\"stageB\");\n * }\n * void end(request req){\n *      req.tracer.add_point(\"end\");\n * }\n * ```\n *\n *  point1     point2     point3    point4\n *    |         |           |         |\n *    |         |           |         |\n *  start---->stageA----->stageB---->end\n *\n * \"request.tracer\" will record the time duration among all trace points.\n **/\nDSN_DECLARE_bool(enable_latency_tracer);\nDSN_DECLARE_bool(enable_latency_tracer_report);\n\nclass latency_tracer\n{\npublic:\n    //-is_sub:\n    //  if `is_sub`=true means its points will be dumped by parent tracer and won't be dumped\n    //  repeatedly in destructor\n    //-threshold:\n    //  threshold < 0: don't dump any trace points\n    //  threshold = 0: dump all trace points\n    //  threshold > 0: dump the trace point when time_used > threshold\n    //-task_code:\n    //  (1) use task code to judge if the task need trace, LPC_LATENCY_TRACE passed by default\n    //  means _enable_trace = true, for other code, it will get config value(see the implement of\n    //  the constructor) to judge the code whether to enable trace.\n    //  (2) the variable is used to trace the common low task work, for example, `aio task` is used\n    //  for nfs/private log/shared log, it will trace all type task if we want trace the `aio task`,\n    //  support the variable, the `aio task tracer` will filter out some unnecessary task base on\n    //  the code type.\n    latency_tracer(bool is_sub,\n                   std::string name,\n                   uint64_t threshold,\n                   const dsn::task_code &code = LPC_LATENCY_TRACE);\n\n    ~latency_tracer();\n\n    // add a trace point to the tracer, it will record the timestamp of point\n    //\n    // -name: user specified name of the trace point\n    void add_point(const std::string &stage_name);\n\n    // append a trace point, the timestamp is passed. it will always append at last position\n    //\n    // NOTE: The method is used for custom stage duration which must make sure the point is\n    // sequential, for example, in the trace link of cross node, receive side timestamp must after\n    // the send side timestamp, you need use the method to make sure the rule to avoid the clock\n    // asynchronization problem. the detail resolution see the method implement\n    //\n    // -name: user specified name of the trace point\n    // -timestamp: user specified timestamp of the trace point\n    void append_point(const std::string &stage_name, uint64_t timestamp);\n\n    // sub_tracer is used for tracking the request which may transfer the other thread, for example:\n    // rdsn \"mutataion\" will async to execute send \"mutation\" to remote rpc node and execute io\n    // task, the \"tracking  responsibility\" is also passed on the async task:\n    //\n    // stageA[mutation]--stageB[mutation]--|-->stageC0[mutation]-->....\n    //                                     |-->stageC1[io]-->....\n    //                                     |-->stageC2[rpc]-->....\n    void add_sub_tracer(const std::shared_ptr<latency_tracer> &tracer);\n\n    void add_sub_tracer(const std::string &name);\n\n    std::shared_ptr<latency_tracer> sub_tracer(const std::string &name);\n\n    void set_name(const std::string &name) { _name = name; }\n\n    void set_description(const std::string &description) { _description = description; }\n\n    void set_parent_point_name(const std::string &name) { _parent_point_name = name; }\n\n    void set_start_time(uint64_t start_time) { _start_time = start_time; }\n\n    const std::string &name() const { return _name; }\n\n    const std::string &description() const { return _description; }\n\n    uint64_t start_time() const { return _start_time; }\n\n    uint64_t last_time() const { return _last_time; }\n\n    const std::string &last_stage_name() const { return _last_stage; }\n\n    bool enabled() const { return _enable_trace; }\n\nprivate:\n    // report the trace point duration to monitor system\n    static void report_trace_point(const std::string &name, uint64_t span);\n\n    // dump and print the trace point into log file\n    void dump_trace_points(/*out*/ std::string &traces);\n\n    bool _is_sub;\n    std::string _name;\n    std::string _description;\n    uint64_t _threshold;\n    uint64_t _start_time;\n    uint64_t _last_time;\n    std::string _last_stage;\n\n    dsn::task_code _task_code;\n    bool _enable_trace;\n\n    utils::rw_lock_nr _point_lock; //{\n    std::map<int64_t, std::string> _points;\n    // }\n\n    std::string _parent_point_name;\n    utils::rw_lock_nr _sub_lock; //{\n    std::unordered_map<std::string, std::shared_ptr<latency_tracer>> _sub_tracers;\n    // }\n\n    friend class latency_tracer_test;\n};\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utils/time_utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#pragma once\n\n#include <chrono>\n#include <cstdio>\n#include <dsn/utility/string_view.h>\n\nnamespace dsn {\nnamespace utils {\n\nstatic struct tm *get_localtime(uint64_t ts_ms, struct tm *tm_buf)\n{\n    auto t = (time_t)(ts_ms / 1000);\n    return localtime_r(&t, tm_buf);\n}\n\n// get time string, which format is yyyy-MM-dd hh:mm:ss.SSS\n// NOTE: using char* as output is usually unsafe. Please use std::string as the output argument\n// as long as it's possible.\nextern void time_ms_to_string(uint64_t ts_ms, char *str);\nextern void time_ms_to_string(uint64_t ts_ms, std::string &str);\n\n// get date string with format of 'yyyy-MM-dd' from given timestamp\ninline void time_ms_to_date(uint64_t ts_ms, char *str, int len)\n{\n    struct tm tmp;\n    strftime(str, len, \"%Y-%m-%d\", get_localtime(ts_ms, &tmp));\n}\n\n// get date string with format of 'yyyy-MM-dd hh:mm:ss' from given timestamp(ms)\ninline void time_ms_to_date_time(uint64_t ts_ms, char *str, int len)\n{\n    struct tm tmp;\n    strftime(str, len, \"%Y-%m-%d %H:%M:%S\", get_localtime(ts_ms, &tmp));\n}\n\n// get date string with format of 'yyyy-MM-dd hh:mm:ss' from given timestamp(s)\ninline std::string time_s_to_date_time(uint64_t unix_seconds)\n{\n    char buffer[128];\n    utils::time_ms_to_date_time(unix_seconds * 1000, buffer, 128);\n    return std::string(buffer);\n}\n\n// parse hour/min/sec from the given timestamp\ninline void time_ms_to_date_time(uint64_t ts_ms, int32_t &hour, int32_t &min, int32_t &sec)\n{\n    struct tm tmp;\n    auto ret = get_localtime(ts_ms, &tmp);\n    hour = ret->tm_hour;\n    min = ret->tm_min;\n    sec = ret->tm_sec;\n}\n\n// get current physical timestamp in ns\ninline uint64_t get_current_physical_time_ns()\n{\n    auto now = std::chrono::high_resolution_clock::now();\n    return std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();\n}\n\n// get current physical timestamp in s\ninline uint64_t get_current_physical_time_s() { return get_current_physical_time_ns() * 1e-9; }\n\n// get unix timestamp of today's zero o'clock.\n// eg. `1525881600` returned when called on May 10, 2018, CST\ninline int64_t get_unix_sec_today_midnight()\n{\n    time_t t = time(nullptr);\n    struct tm tmp;\n    auto ret = localtime_r(&t, &tmp);\n    ret->tm_hour = 0;\n    ret->tm_min = 0;\n    ret->tm_sec = 0;\n    return static_cast<int64_t>(mktime(ret));\n}\n\n// `hh:mm` (range in [00:00, 23:59]) to seconds since 00:00:00\n// eg. `01:00` => `3600`\n// Return: -1 when invalid\ninline int hh_mm_to_seconds(dsn::string_view hhmm)\n{\n    int hour = 0, min = 0, sec = -1;\n    if (::sscanf(hhmm.data(), \"%d:%d\", &hour, &min) == 2 && (0 <= hour && hour <= 23) &&\n        (0 <= min && min <= 59)) {\n        sec = 3600 * hour + 60 * min;\n    }\n    return sec;\n}\n\n// local time `hh:mm` to unix timestamp.\n// eg. `18:10` => `1525947000` when called on May 10, 2018, CST\n// Return: -1 when invalid\ninline int64_t hh_mm_today_to_unix_sec(string_view hhmm_of_day)\n{\n    int sec_of_day = hh_mm_to_seconds(hhmm_of_day);\n    if (sec_of_day == -1) {\n        return -1;\n    }\n\n    return get_unix_sec_today_midnight() + sec_of_day;\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utils/token_bucket_throttling_controller.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n#pragma once\n\n#include <chrono>\n#include <memory>\n#include <dsn/utility/TokenBucket.h>\n\nnamespace dsn {\nnamespace utils {\n\nusing DynamicTokenBucket = folly::BasicDynamicTokenBucket<std::chrono::steady_clock>;\n\n// token_bucket_throttling_controller ignores `delay` parameter\nclass token_bucket_throttling_controller\n{\nprivate:\n    friend class token_bucket_throttling_controller_test;\n\n    std::unique_ptr<DynamicTokenBucket> _token_bucket;\n\n    bool _enabled;\n    std::string _env_value;\n    int32_t _partition_count = 0;\n    double _rate;\n    double _burstsize;\n\npublic:\n    token_bucket_throttling_controller();\n\n    // return ture means you can get token\n    // return false means the bucket is already empty, but the token is borrowed from future.\n    // non-blocking\n    bool consume_token(int32_t request_units);\n\n    // if the bucket has no tokens, return false\n    bool available() const;\n\n    // reset to no throttling.\n    void reset(bool &changed, std::string &old_env_value);\n\n    // return the current env value.\n    const std::string &env_value() const;\n\n    // Configures throttling strategy dynamically from app-envs.\n    //\n    // Support two style format:\n    // 1. style: \"20000*delay*100,20000*reject*100\"\n    //      example: 20000*delay*100,20000*reject*100\n    //      result: reject 20000 request_units, but never delay\n    //      example: 20000*delay*100\n    //      result: never reject or delay\n    //      example: 20000*reject*100\n    //      result: reject 20000 request_units\n    // 2. style: 20/\"20K\"/\"20M\"\n    //      example: 20K\n    //      result: reject 20000 request_units\n    //\n    // return true if parse succeed.\n    // return false if parse failed for the reason of invalid env_value.\n    // if return false, the original value will not be changed.\n    // 'parse_error' is set when return false.\n    // 'changed' is set when return true.\n    // 'old_env_value' is set when 'changed' is set to true.\n    bool parse_from_env(const std::string &env_value,\n                        int32_t partition_count,\n                        std::string &parse_error,\n                        bool &changed,\n                        std::string &old_env_value);\n\n    static bool string_to_value(std::string str, int64_t &value);\n\n    // wrapper of transform_env_string, check if the env string is validated.\n    static bool validate(const std::string &env, std::string &hint_message);\n\n    static bool transform_env_string(const std::string &env,\n                                     int64_t &reject_size_value,\n                                     bool &enabled,\n                                     std::string &hint_message);\n};\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "include/dsn/utils/token_buckets.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <unordered_map>\n\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/TokenBucket.h>\n\nnamespace dsn {\nnamespace utils {\n\nclass token_buckets\n{\npublic:\n    std::shared_ptr<folly::DynamicTokenBucket> get_token_bucket(const std::string &name);\n\nprivate:\n    utils::rw_lock_nr _buckets_lock;\n    // token_name->token_ptr\n    std::unordered_map<std::string, std::shared_ptr<folly::DynamicTokenBucket>> _token_buckets;\n\n    friend class token_buckets_test;\n};\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nos=linux\nscripts_dir=`pwd`/scripts/$os\n\nfunction exit_if_fail() {\n    if [ $1 != 0 ]; then\n        exit $1\n    fi\n}\n\nfunction usage()\n{\n    echo \"usage: run.sh <command> [<args>]\"\n    echo\n    echo \"Command list:\"\n    echo \"   help        print the help info\"\n    echo \"   build       build the system\"\n    echo \"   install     install the system\"\n    echo \"   test        test the system\"\n    echo \"   start_zk    start the local single zookeeper server\"\n    echo \"   stop_zk     stop the local single zookeeper server\"\n    echo \"   clear_zk    stop the local single zookeeper server and clear the data\"\n    echo \"   deploy      deploy the program to remote machine\"\n    echo \"   start       start program at remote machine\"\n    echo \"   stop        stop program at remote machine\"\n    echo \"   clean       clean deployed program at remote machine\"\n    echo\n    echo \"Command 'run.sh <command> -h' will print help for subcommands.\"\n}\n\n#####################\n## build\n#####################\nfunction usage_build()\n{\n    subcommand=\"build\"\n    if [ \"$ONLY_BUILD\" == \"NO\" ]; then\n        subcommand=\"test\"\n    fi\n    echo \"Options for subcommand '$subcommand':\"\n    echo \"   -h|--help             print the help info\"\n    echo \"   -t|--type             build type: debug|release, default is debug\"\n    echo \"   -c|--clear            clear environment before building, but not clear thirdparty\"\n    echo \"   --clear_thirdparty    clear environment before building, including thirdparty\"\n    echo \"   --compiler            specify c and cxx compiler, sperated by ','\"\n    echo \"                         e.g., \\\"gcc,g++\\\" or \\\"clang-3.9,clang++-3.9\\\"\"\n    echo \"                         default is \\\"gcc,g++\\\"\"\n    echo \"   -j|--jobs <num>       the number of jobs to run simultaneously, default 8\"\n    echo \"   --enable_gcov         generate gcov code coverage report, default no\"\n    echo \"   -v|--verbose          build in verbose mode, default no\"\n    echo \"   --notest              build without building unit tests, default no\"\n    echo \"   --disable_gperf       build without gperftools, this flag is mainly used\"\n    echo \"                         to enable valgrind memcheck, default no\"\n    echo \"   --use_jemalloc        build with jemalloc\"\n    echo \"   --skip_thirdparty     whether to skip building thirdparties, default no\"\n    echo \"   --check               whether to perform code check before building\"\n    echo \"   --sanitizer <type>    build with sanitizer to check potential problems,\n                                   type: address|leak|thread|undefined\"\n    if [ \"$ONLY_BUILD\" == \"NO\" ]; then\n        echo \"   -m|--test_module      specify modules to test, split by ',',\"\n        echo \"                         e.g., \\\"dsn_runtime_tests,dsn_meta_state_tests\\\",\"\n        echo \"                         if not set, then run all tests\"\n    fi\n\n    echo \"   --enable_rocksdb_portable      build a portable rocksdb binary\"\n}\nfunction run_build()\n{\n    # NOTE(jiashuo1): No \"memory\" check mode, because MemorySanitizer is only available in Clang for Linux x86_64 targets\n    # https://www.jetbrains.com/help/clion/google-sanitizers.html\n    SANITIZERS=(\"address\" \"leak\" \"thread\" \"undefined\")\n\n    C_COMPILER=\"gcc\"\n    CXX_COMPILER=\"g++\"\n    BUILD_TYPE=\"release\"\n    CLEAR=NO\n    CLEAR_THIRDPARTY=NO\n    JOB_NUM=8\n    ENABLE_GCOV=NO\n    RUN_VERBOSE=NO\n    NO_TEST=NO\n    DISABLE_GPERF=NO\n    USE_JEMALLOC=NO\n    SKIP_THIRDPARTY=NO\n    CHECK=NO\n    SANITIZER=\"\"\n    TEST_MODULE=\"\"\n    ROCKSDB_PORTABLE=OFF\n    while [[ $# > 0 ]]; do\n        key=\"$1\"\n        case $key in\n            -h|--help)\n                usage_build\n                exit 0\n                ;;\n            -t|--type)\n                BUILD_TYPE=\"$2\"\n                shift\n                ;;\n            -c|--clear)\n                CLEAR=YES\n                ;;\n            --clear_thirdparty)\n                CLEAR_THIRDPARTY=YES\n                ;;\n            --compiler)\n                C_COMPILER=`echo $2 | awk -F',' '{print $1}'`\n                CXX_COMPILER=`echo $2 | awk -F',' '{print $2}'`\n                if [ \"x\"$C_COMPILER == \"x\" -o \"x\"$CXX_COMPILER == \"x\" ]; then\n                    echo \"ERROR: invalid compiler option: $2\"\n                    echo\n                    usage_build\n                    exit 1\n                fi\n                shift\n                ;;\n            -j|--jobs)\n                JOB_NUM=\"$2\"\n                shift\n                ;;\n            --enable_gcov)\n                ENABLE_GCOV=YES\n                BUILD_TYPE=\"debug\"\n                ;;\n            -v|--verbose)\n                RUN_VERBOSE=YES\n                ;;\n            --notest)\n                NO_TEST=YES\n                ;;\n            --disable_gperf)\n                DISABLE_GPERF=YES\n                ;;\n            --use_jemalloc)\n                DISABLE_GPERF=YES\n                USE_JEMALLOC=YES\n                ;;\n            --skip_thirdparty)\n                SKIP_THIRDPARTY=YES\n                ;;\n            --check)\n                CHECK=YES\n                ;;\n            --sanitizer)\n                IS_SANITIZERS=`echo ${SANITIZERS[@]} | grep -w $2`\n                if [[ -z ${IS_SANITIZERS} ]]; then\n                    echo \"ERROR: unknown sanitizer type \\\"$2\\\"\"\n                    usage_build\n                    exit 1\n                fi\n                SANITIZER=\"$2\"\n                shift\n                ;;\n            -m|--test_module)\n                if [ \"$ONLY_BUILD\" == \"YES\" ]; then\n                    echo \"ERROR: unknown option \\\"$key\\\"\"\n                    echo\n                    usage_build\n                    exit 1\n                fi\n                TEST_MODULE=\"$2\"\n                shift\n                ;;\n            --enable_rocksdb_portable)\n                ROCKSDB_PORTABLE=ON\n                ;;\n            *)\n                echo \"ERROR: unknown option \\\"$key\\\"\"\n                echo\n                usage_build\n                exit 1\n                ;;\n        esac\n        shift\n    done\n\n    if [[ ${CHECK} == \"YES\" ]]; then\n        ${scripts_dir}/run-clang-format.sh\n        exit_if_fail $?\n    fi\n\n    if [ \"$(uname)\" == \"Darwin\" ]; then\n        MACOS_OPENSSL_ROOT_DIR=\"/usr/local/opt/openssl\"\n        CMAKE_OPTIONS=\"-DMACOS_OPENSSL_ROOT_DIR=${MACOS_OPENSSL_ROOT_DIR}\"\n    fi\n    if [[ ${SKIP_THIRDPARTY} == \"YES\" ]]; then\n        echo \"Skip building third-parties...\"\n    else\n        cd thirdparty\n        if [[ \"$CLEAR_THIRDPARTY\" == \"YES\" ]]; then\n            echo \"Clear third-parties...\"\n            rm -rf build\n            rm -rf output\n        fi\n        echo \"Start building third-parties...\"\n        mkdir -p build\n        pushd build\n        CMAKE_OPTIONS=\"${CMAKE_OPTIONS}\n                       -DCMAKE_C_COMPILER=${C_COMPILER}\n                       -DCMAKE_CXX_COMPILER=${CXX_COMPILER}\n                       -DCMAKE_BUILD_TYPE=Release\n                       -DROCKSDB_PORTABLE=${ROCKSDB_PORTABLE}\n                       -DUSE_JEMALLOC=${USE_JEMALLOC}\"\n        cmake .. ${CMAKE_OPTIONS}\n        make -j$JOB_NUM\n        exit_if_fail $?\n        popd\n        cd ..\n    fi\n\n    if [ \"$BUILD_TYPE\" != \"debug\" -a \"$BUILD_TYPE\" != \"release\" ]; then\n        echo \"ERROR: invalid build type \\\"$BUILD_TYPE\\\"\"\n        echo\n        usage_build\n        exit 1\n    fi\n    if [ \"$ONLY_BUILD\" == \"NO\" ]; then\n        run_start_zk\n        if [ $? -ne 0 ]; then\n            echo \"ERROR: start zk failed\"\n            exit 1\n        fi\n    fi\n    C_COMPILER=\"$C_COMPILER\" CXX_COMPILER=\"$CXX_COMPILER\" BUILD_TYPE=\"$BUILD_TYPE\" \\\n        ONLY_BUILD=\"$ONLY_BUILD\" CLEAR=\"$CLEAR\" JOB_NUM=\"$JOB_NUM\" \\\n        ENABLE_GCOV=\"$ENABLE_GCOV\" SANITIZER=\"$SANITIZER\" \\\n        RUN_VERBOSE=\"$RUN_VERBOSE\" TEST_MODULE=\"$TEST_MODULE\" NO_TEST=\"$NO_TEST\" \\\n        DISABLE_GPERF=\"$DISABLE_GPERF\" USE_JEMALLOC=\"$USE_JEMALLOC\" \\\n        MACOS_OPENSSL_ROOT_DIR=\"$MACOS_OPENSSL_ROOT_DIR\" $scripts_dir/build.sh\n}\n\n#####################\n## start_zk\n#####################\nfunction usage_start_zk()\n{\n    echo \"Options for subcommand 'start_zk':\"\n    echo \"   -h|--help         print the help info\"\n    echo \"   -d|--install_dir <dir>\"\n    echo \"                     zookeeper install directory,\"\n    echo \"                     if not set, then default is './.zk_install'\"\n    echo \"   -p|--port <port>  listen port of zookeeper, default is 12181\"\n}\n\nfunction run_start_zk()\n{\n    # first we check the environment that zk need: java and nc command\n    # check java\n    type java >/dev/null 2>&1 || { echo >&2 \"start zk failed, need install jre...\"; exit 1;}\n\n    # check nc command\n    type nc >/dev/null 2>&1 || { echo >&2 \"start zk failed, need install netcat command...\"; exit 1;}\n\n    INSTALL_DIR=`pwd`/.zk_install\n    PORT=12181\n    while [[ $# > 0 ]]; do\n        key=\"$1\"\n        case $key in\n            -h|--help)\n                usage_start_zk\n                exit 0\n                ;;\n            -d|--install_dir)\n                INSTALL_DIR=$2\n                shift\n                ;;\n            -p|--port)\n                PORT=$2\n                shift\n                ;;\n            *)\n                echo \"ERROR: unknown option \\\"$key\\\"\"\n                echo\n                usage_start_zk\n                exit 1\n                ;;\n        esac\n        shift\n    done\n    INSTALL_DIR=\"$INSTALL_DIR\" PORT=\"$PORT\" ./scripts/linux/start_zk.sh\n}\n\n#####################\n## stop_zk\n#####################\nfunction usage_stop_zk()\n{\n    echo \"Options for subcommand 'stop_zk':\"\n    echo \"   -h|--help         print the help info\"\n    echo \"   -d|--install_dir <dir>\"\n    echo \"                     zookeeper install directory,\"\n    echo \"                     if not set, then default is './.zk_install'\"\n}\nfunction run_stop_zk()\n{\n    INSTALL_DIR=`pwd`/.zk_install\n    while [[ $# > 0 ]]; do\n        key=\"$1\"\n        case $key in\n            -h|--help)\n                usage_stop_zk\n                exit 0\n                ;;\n            -d|--install_dir)\n                INSTALL_DIR=$2\n                shift\n                ;;\n            *)\n                echo \"ERROR: unknown option \\\"$key\\\"\"\n                echo\n                usage_stop_zk\n                exit 1\n                ;;\n        esac\n        shift\n    done\n    INSTALL_DIR=\"$INSTALL_DIR\" ./scripts/linux/stop_zk.sh\n}\n\n#####################\n## clear_zk\n#####################\nfunction usage_clear_zk()\n{\n    echo \"Options for subcommand 'clear_zk':\"\n    echo \"   -h|--help         print the help info\"\n    echo \"   -d|--install_dir <dir>\"\n    echo \"                     zookeeper install directory,\"\n    echo \"                     if not set, then default is './.zk_install'\"\n}\nfunction run_clear_zk()\n{\n    INSTALL_DIR=`pwd`/.zk_install\n    while [[ $# > 0 ]]; do\n        key=\"$1\"\n        case $key in\n            -h|--help)\n                usage_clear_zk\n                exit 0\n                ;;\n            -d|--install_dir)\n                INSTALL_DIR=$2\n                shift\n                ;;\n            *)\n                echo \"ERROR: unknown option \\\"$key\\\"\"\n                echo\n                usage_clear__zk\n                exit 1\n                ;;\n        esac\n        shift\n    done\n    INSTALL_DIR=\"$INSTALL_DIR\" ./scripts/linux/clear_zk.sh\n}\n\n####################################################################\n\nif [ $# -eq 0 ]; then\n    usage\n    exit 0\nfi\ncmd=$1\ncase $cmd in\n    help)\n        usage ;;\n    build)\n        shift\n        ONLY_BUILD=YES\n        run_build $* ;;\n    test)\n        shift\n        ONLY_BUILD=NO\n        run_build $* ;;\n    start_zk)\n        shift\n        run_start_zk $* ;;\n    stop_zk)\n        shift\n        run_stop_zk $* ;;\n    clear_zk)\n        shift\n        run_clear_zk $* ;;\n    deploy|start|stop|clean)\n        $scripts_dir/deploy.sh $* ;;\n    *)\n        echo \"ERROR: unknown command $cmd\"\n        echo\n        usage\n        exit 1\nesac\n\n"
  },
  {
    "path": "scripts/linux/build.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# !!! This script should be run in dsn project root directory (../../).\n#\n# Shell Options:\n#    CLEAR          YES|NO\n#    JOB_NUM        <num>\n#    BUILD_TYPE     debug|release\n#    C_COMPILER     <str>\n#    CXX_COMPILER   <str>\n#    ONLY_BUILD     YES|NO\n#    RUN_VERBOSE    YES|NO\n#    ENABLE_GCOV    YES|NO\n#    TEST_MODULE    \"<module1> <module2> ...\"\n#\n# CMake options:\n#    -DCMAKE_C_COMPILER=gcc|clang\n#    -DCMAKE_CXX_COMPILER=g++|clang++\n#    [-DCMAKE_BUILD_TYPE=Debug]\n#    [-DENABLE_GCOV=TRUE]\n\nROOT=`pwd`\nREPORT_DIR=$ROOT/test_reports\nBUILD_DIR=\"$ROOT/builder\"\nGCOV_DIR=\"$ROOT/gcov_report\"\nTIME=`date '+%Y-%m-%d %H:%M:%S'`\n\necho \"C_COMPILER=$C_COMPILER\"\necho \"CXX_COMPILER=$CXX_COMPILER\"\nCMAKE_OPTIONS=\"$CMAKE_OPTIONS -DCMAKE_C_COMPILER=$C_COMPILER -DCMAKE_CXX_COMPILER=$CXX_COMPILER\"\n\necho \"JOB_NUM=$JOB_NUM\"\nMAKE_OPTIONS=\"$MAKE_OPTIONS -j$JOB_NUM\"\n\nif [ \"$CLEAR\" == \"YES\" ]\nthen\n    echo \"CLEAR=YES\"\nelse\n    echo \"CLEAR=NO\"\nfi\n\nif [ \"$BUILD_TYPE\" == \"debug\" ]\nthen\n    echo \"BUILD_TYPE=debug\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DCMAKE_BUILD_TYPE=Debug\"\nelse\n    echo \"BUILD_TYPE=release\"\nfi\n\nif [ \"$ONLY_BUILD\" == \"YES\" ]\nthen\n    echo \"ONLY_BUILD=YES\"\nelse\n    echo \"ONLY_BUILD=NO\"\nfi\n\nif [ \"$RUN_VERBOSE\" == \"YES\" ]\nthen\n    echo \"RUN_VERBOSE=YES\"\n    MAKE_OPTIONS=\"$MAKE_OPTIONS VERBOSE=1\"\nelse\n    echo \"RUN_VERBOSE=NO\"\nfi\n\nif [ \"$ENABLE_GCOV\" == \"YES\" ]\nthen\n    echo \"ENABLE_GCOV=YES\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DENABLE_GCOV=TRUE\"\nelse\n    echo \"ENABLE_GCOV=NO\"\nfi\n\nif [ \"$NO_TEST\" == \"YES\" ]\nthen\n    echo \"NO_TEST=YES\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DBUILD_TEST=OFF\"\nelse\n    echo \"NO_TEST=NO\"\nfi\n\n# valgrind can not work together with gpertools\n# you may want to use this option when you want to run valgrind\nif [ \"$DISABLE_GPERF\" == \"YES\" ]\nthen\n    echo \"DISABLE_GPERF=YES\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DENABLE_GPERF=Off\"\nelse\n    echo \"DISABLE_GPERF=NO\"\nfi\n\nif [ \"$USE_JEMALLOC\" == \"YES\" ]\nthen\n    echo \"USE_JEMALLOC=YES\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DUSE_JEMALLOC=ON\"\nelse\n    echo \"USE_JEMALLOC=NO\"\nfi\n\nif [ ! -z \"$SANITIZER\" ]\nthen\n    echo \"SANITIZER=$SANITIZER\"\n    CMAKE_OPTIONS=\"$CMAKE_OPTIONS -DSANITIZER=$SANITIZER\"\nelse\n    echo \"Build without sanitizer\"\nfi\n\necho \"CMAKE_OPTIONS=$CMAKE_OPTIONS\"\necho \"MAKE_OPTIONS=$MAKE_OPTIONS\"\n\necho \"#############################################################################\"\n\nif [ -f $BUILD_DIR/CMAKE_OPTIONS ]\nthen\n    LAST_OPTIONS=`cat $BUILD_DIR/CMAKE_OPTIONS`\n    if [ \"$CMAKE_OPTIONS\" != \"$LAST_OPTIONS\" ]\n    then\n        echo \"WARNING: CMAKE_OPTIONS has changed from last build, clear environment first\"\n        CLEAR=YES\n    fi\nfi\n\nif [ \"$CLEAR\" == \"YES\" -a -d \"$BUILD_DIR\" ]\nthen\n    echo \"Clear builder...\"\n    rm -rf $BUILD_DIR\nfi\n\nif [ ! -d \"$BUILD_DIR\" ]\nthen\n    echo \"Running cmake...\"\n    mkdir -p $BUILD_DIR\n    cd $BUILD_DIR\n    echo \"$CMAKE_OPTIONS\" >CMAKE_OPTIONS\n    cmake .. -DOPENSSL_ROOT_DIR=$MACOS_OPENSSL_ROOT_DIR -DCMAKE_INSTALL_PREFIX=$BUILD_DIR/output $CMAKE_OPTIONS\n    if [ $? -ne 0 ]\n    then\n        echo \"ERROR: cmake failed\"\n        exit 1\n    fi\n    cd ..\nfi\n\ncd $BUILD_DIR\necho \"[$(date)] Building...\"\nmake install $MAKE_OPTIONS\nif [ $? -ne 0 ]\nthen\n    echo \"ERROR: build failed\"\n    exit 1\nelse\n    echo \"[$(date)] Build succeed\"\nfi\ncd ..\n\nif [ \"$ONLY_BUILD\" == \"YES\" ]\nthen\n    exit 0\nfi\n\necho \"################################# start testing ################################\"\n\nif [ -z \"$TEST_MODULE\" ]\nthen\n    # supported test module\n    TEST_MODULE=\"dsn_runtime_tests,dsn_utils_tests,dsn_perf_counter_test,dsn.zookeeper.tests,dsn_aio_test,dsn.failure_detector.tests,dsn_meta_state_tests,dsn_nfs_test,dsn_block_service_test,dsn.replication.simple_kv,dsn.rep_tests.simple_kv,dsn.meta.test,dsn.replica.test,dsn_http_test,dsn_replica_dup_test,dsn_replica_backup_test,dsn_replica_bulk_load_test,dsn_replica_split_test\"\nfi\n\necho \"TEST_MODULE=$TEST_MODULE\"\n\nif [ ! -d \"$REPORT_DIR\" ]\nthen\n    mkdir -p $REPORT_DIR\nfi\n\nfor MODULE in `echo $TEST_MODULE | sed 's/,/ /g'`; do\n    echo \"====================== run $MODULE ==========================\"\n    MODULE_DIR=$BUILD_DIR/bin/$MODULE\n    if [ ! -d \"$MODULE_DIR\" ]\n    then\n        echo \"ERROR: module dir $MODULE_DIR not exist\"\n        exit 1\n    fi\n    if [ ! -f \"$MODULE_DIR/run.sh\" ]\n    then\n        echo \"ERROR: module test entrance script $MODULE_DIR/run.sh doesn't exist\"\n        exit 1\n    fi\n    cd $MODULE_DIR\n    REPORT_DIR=$REPORT_DIR ./run.sh\n    ret=$?\n    if [ $ret -ne 0 ]\n    then\n        echo \"ERROR: run $MODULE failed, return_code = $ret\"\n        exit 1\n    fi\ndone\n\nif [ \"$ENABLE_GCOV\" == \"YES\" ]\nthen\n    echo \"Generating gcov report...\"\n    cd $ROOT\n    mkdir -p $GCOV_DIR\n\n    echo \"Running gcovr to produce HTML code coverage report.\"\n    gcovr --html --html-details -r $ROOT --object-directory=$BUILD_DIR \\\n          -o $GCOV_DIR/index.html\n    if [ $? -ne 0 ]\n    then\n        exit 1\n    fi\nfi\n\necho \"Test succeed\"\n\n"
  },
  {
    "path": "scripts/linux/clear_zk.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n#\n# Options:\n#    INSTALL_DIR    <dir>\n\nif [ -z \"$INSTALL_DIR\" ]\nthen\n    echo \"ERROR: no INSTALL_DIR specified\"\n    exit 1\nfi\n\ncd $INSTALL_DIR\n\nZOOKEEPER_HOME=`pwd`/apache-zookeeper-3.7.0-bin\n\nif [ -d \"$ZOOKEEPER_HOME\" ]\nthen\n    $ZOOKEEPER_HOME/bin/zkServer.sh stop\n    rm -rf $ZOOKEEPER_HOME/data &>/dev/null\n    echo \"Clearing zookeeper ... CLEARED\"\nfi\n"
  },
  {
    "path": "scripts/linux/install.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# !!! This script should be run in dsn project root directory (../../).\n#\n# Options:\n#    INSTALL_DIR    <dir>\n\nif [ -z \"$INSTALL_DIR\" ]\nthen\n    echo \"ERROR: no INSTALL_DIR specified\"\n    exit 1\nfi\n\nif [ ! -f \"builder/output/lib/libdsn.core.so\" ]\nthen\n    echo \"ERROR: not build yet\"\n    exit 1\nfi\n\nmkdir -p $INSTALL_DIR\nif [ $? -ne 0 ]\nthen\n    echo \"ERROR: mkdir $INSTALL_DIR failed\"\n    exit 1\nfi\nINSTALL_DIR=`cd $INSTALL_DIR; pwd`\necho \"INSTALL_DIR=$INSTALL_DIR\"\n\necho \"Copying files...\"\ncp -r -v `pwd`/builder/output/* $INSTALL_DIR\necho \"Install succeed\"\nif [ -z \"$DSN_ROOT\" -o \"$DSN_ROOT\" != \"$INSTALL_DIR\" ]\nthen\n    if ! grep -q '^export DSN_ROOT=' ~/.bashrc\n    then\n        echo \"export DSN_ROOT=$INSTALL_DIR\" >>~/.bashrc\n    else\n        sed -i \"s@^export DSN_ROOT=.*@export DSN_ROOT=$INSTALL_DIR@\" ~/.bashrc\n    fi\n    if ! grep -q '^export LD_LIBRARY_PATH=.*DSN_ROOT' ~/.bashrc\n    then\n        echo 'export LD_LIBRARY_PATH=$DSN_ROOT/lib:$LD_LIBRARY_PATH' >>~/.bashrc\n    fi\n    echo \"====== ENVIRONMENT VARIABLE \\$DSN_ROOT SET OR CHANGED, please run 'source ~/.bashrc' ======\"\nfi\n\n"
  },
  {
    "path": "scripts/linux/learn_stat.py",
    "content": "#!/usr/bin/env python\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n#\n# This script is to extract replica learning statistics from log files.\n#\n# USAGE: python learn_stat.py <log-dir>\n#\n\nimport re, sys\nfrom os import listdir\nfrom os.path import isfile, join\nif len(sys.argv) == 1:\n  print \"USAGE:\",sys.argv[0],\"<log-dir>\"\n  sys.exit(1)\ndir = sys.argv[1]\nfile_ids = sorted([int(f[4:][0:-4]) for f in listdir(dir) if isfile(join(dir, f)) and f.startswith('log.') and f.endswith('.txt')])\np_learn = re.compile('learnee =|learner =|learn_duration =')\np_id = re.compile(' ([0-9.]+)@[0-9.:]+: .*\\[([0-9]+)\\]: ')\np_decree = re.compile('app_committed_decree = ([0-9]+)')\np_learner = re.compile(' [0-9.]+@([0-9.:]+): init_learn')\np_learnee = re.compile('learnee = ([0-9.:]+)')\np_duration = re.compile('learn_duration = ([0-9]+)')\np_meta_size = re.compile('on_learn_reply.*learned_meta_size = ([0-9]+)')\np_file_size = re.compile('on_copy_remote_state_completed.*copy_file_count = ([0-9]+), copy_file_size = ([0-9]+)')\nlearn_map = {}\nfor fid in file_ids:\n  fname = 'log.'+str(fid)+'.txt'\n  fpath = join(dir,fname)\n  with open(fpath) as f:\n    lineno = 0\n    for line in f:\n      lineno += 1\n      if not p_learn.search(line):\n        continue\n      # id\n      m = p_id.search(line)\n      if not m:\n        continue\n      gpid = m.group(1)\n      signature = m.group(2)\n      id = gpid+'#'+m.group(2)\n      if id not in learn_map:\n        learn = {'gpid':gpid,'signature':signature,'log_file':fname+':'+str(lineno),'learn_round':0,'duration':0,'meta_size':0,'file_count':0,'file_size':0,'completed':False}\n        learn_map[id] = learn\n      else:\n        learn = learn_map[id]\n      # learn_round & start_decree\n      if 'init_learn' in line:\n        learn['learn_round'] += 1\n        if 'start_decree' not in learn:\n          m = p_decree.search(line)\n          if m:\n            start_decree = int(m.group(1))\n            learn['start_decree'] = start_decree\n      # learner\n      if 'learner' not in learn:\n        m = p_learner.search(line)\n        if m:\n          learner = m.group(1)\n          learn['learner'] = learner\n      # learnee\n      if 'learnee' not in learn:\n        m = p_learnee.search(line)\n        if m:\n          learnee = m.group(1)\n          learn['learnee'] = learnee\n      # duration\n      m = p_duration.search(line)\n      if m:\n        duration = int(m.group(1))\n        if duration > learn['duration']:\n          learn['duration'] = duration\n      # meta_size\n      m = p_meta_size.search(line)\n      if m:\n        meta_size = int(m.group(1))\n        learn['meta_size'] += meta_size\n      # file size\n      m = p_file_size.search(line)\n      if m:\n        file_count = int(m.group(1))\n        file_size = int(m.group(2))\n        learn['file_count'] += file_count\n        learn['file_size'] += file_size\n      # completed\n      if not learn['completed'] and 'notify_learn_completion' in line:\n        learn['completed'] = True\n        if 'end_decree' not in learn:\n          m = p_decree.search(line)\n          if m:\n            end_decree = int(m.group(1))\n            learn['end_decree'] = end_decree\n        if 'start_decree' in learn and 'end_decree' in learn:\n          learn['increased_decree'] = learn['end_decree'] - learn['start_decree'] + 1\nfor id, learn in learn_map.items():\n  if learn['completed']:\n    print learn\nprint\nprint '=========================================================='\nprint\nfor id, learn in learn_map.items():\n  if not learn['completed']:\n    print learn\n\n"
  },
  {
    "path": "scripts/linux/run-clang-format.py",
    "content": "#!/usr/bin/env python\n\n# MIT License\n#\n# Copyright (c) 2017 Guillaume Papin\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in all\n# copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n# SOFTWARE.\n\n\"\"\"A wrapper script around clang-format, suitable for linting multiple files\nand to use for continuous integration.\n\nThis is an alternative API for the clang-format command line.\nIt runs over multiple files and directories in parallel.\nA diff output is produced and a sensible exit code is returned.\n\n\"\"\"\n\nfrom __future__ import print_function, unicode_literals\n\nimport argparse\nimport codecs\nimport difflib\nimport fnmatch\nimport io\nimport errno\nimport multiprocessing\nimport os\nimport signal\nimport subprocess\nimport sys\nimport traceback\n\nfrom functools import partial\n\ntry:\n    from subprocess import DEVNULL  # py3k\nexcept ImportError:\n    DEVNULL = open(os.devnull, \"wb\")\n\n\nDEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx'\nDEFAULT_CLANG_FORMAT_IGNORE = '.clang-format-ignore'\n\n\nclass ExitStatus:\n    SUCCESS = 0\n    DIFF = 1\n    TROUBLE = 2\n\ndef excludes_from_file(ignore_file):\n    excludes = []\n    try:\n        with io.open(ignore_file, 'r', encoding='utf-8') as f:\n            for line in f:\n                if line.startswith('#'):\n                    # ignore comments\n                    continue\n                pattern = line.rstrip()\n                if not pattern:\n                    # allow empty lines\n                    continue\n                excludes.append(pattern)\n    except EnvironmentError as e:\n        if e.errno != errno.ENOENT:\n            raise\n    return excludes;\n\ndef list_files(files, recursive=False, extensions=None, exclude=None):\n    if extensions is None:\n        extensions = []\n    if exclude is None:\n        exclude = []\n\n    out = []\n    for file in files:\n        if recursive and os.path.isdir(file):\n            for dirpath, dnames, fnames in os.walk(file):\n                fpaths = [os.path.join(dirpath, fname) for fname in fnames]\n                for pattern in exclude:\n                    # os.walk() supports trimming down the dnames list\n                    # by modifying it in-place,\n                    # to avoid unnecessary directory listings.\n                    dnames[:] = [\n                        x for x in dnames\n                        if\n                        not fnmatch.fnmatch(os.path.join(dirpath, x), pattern)\n                    ]\n                    fpaths = [\n                        x for x in fpaths if not fnmatch.fnmatch(x, pattern)\n                    ]\n                for f in fpaths:\n                    ext = os.path.splitext(f)[1][1:]\n                    if ext in extensions:\n                        out.append(f)\n        else:\n            out.append(file)\n    return out\n\n\ndef make_diff(file, original, reformatted):\n    return list(\n        difflib.unified_diff(\n            original,\n            reformatted,\n            fromfile='{}\\t(original)'.format(file),\n            tofile='{}\\t(reformatted)'.format(file),\n            n=3))\n\n\nclass DiffError(Exception):\n    def __init__(self, message, errs=None):\n        super(DiffError, self).__init__(message)\n        self.errs = errs or []\n\n\nclass UnexpectedError(Exception):\n    def __init__(self, message, exc=None):\n        super(UnexpectedError, self).__init__(message)\n        self.formatted_traceback = traceback.format_exc()\n        self.exc = exc\n\n\ndef run_clang_format_diff_wrapper(args, file):\n    try:\n        ret = run_clang_format_diff(args, file)\n        return ret\n    except DiffError:\n        raise\n    except Exception as e:\n        raise UnexpectedError('{}: {}: {}'.format(file, e.__class__.__name__,\n                                                  e), e)\n\n\ndef run_clang_format_diff(args, file):\n    try:\n        with io.open(file, 'r', encoding='utf-8') as f:\n            original = f.readlines()\n    except IOError as exc:\n        raise DiffError(str(exc))\n    \n    if args.in_place:\n        invocation = [args.clang_format_executable, '-i', file]\n    else:\n        invocation = [args.clang_format_executable, file]\n\n    if args.style:\n        invocation.extend(['--style', args.style])\n\n    if args.dry_run:\n        print(\" \".join(invocation))\n        return [], []\n\n    # Use of utf-8 to decode the process output.\n    #\n    # Hopefully, this is the correct thing to do.\n    #\n    # It's done due to the following assumptions (which may be incorrect):\n    # - clang-format will returns the bytes read from the files as-is,\n    #   without conversion, and it is already assumed that the files use utf-8.\n    # - if the diagnostics were internationalized, they would use utf-8:\n    #   > Adding Translations to Clang\n    #   >\n    #   > Not possible yet!\n    #   > Diagnostic strings should be written in UTF-8,\n    #   > the client can translate to the relevant code page if needed.\n    #   > Each translation completely replaces the format string\n    #   > for the diagnostic.\n    #   > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation\n    #\n    # It's not pretty, due to Python 2 & 3 compatibility.\n    encoding_py3 = {}\n    if sys.version_info[0] >= 3:\n        encoding_py3['encoding'] = 'utf-8'\n\n    try:\n        proc = subprocess.Popen(\n            invocation,\n            stdout=subprocess.PIPE,\n            stderr=subprocess.PIPE,\n            universal_newlines=True,\n            **encoding_py3)\n    except OSError as exc:\n        raise DiffError(\n            \"Command '{}' failed to start: {}\".format(\n                subprocess.list2cmdline(invocation), exc\n            )\n        )\n    proc_stdout = proc.stdout\n    proc_stderr = proc.stderr\n    if sys.version_info[0] < 3:\n        # make the pipes compatible with Python 3,\n        # reading lines should output unicode\n        encoding = 'utf-8'\n        proc_stdout = codecs.getreader(encoding)(proc_stdout)\n        proc_stderr = codecs.getreader(encoding)(proc_stderr)\n    # hopefully the stderr pipe won't get full and block the process\n    outs = list(proc_stdout.readlines())\n    errs = list(proc_stderr.readlines())\n    proc.wait()\n    if proc.returncode:\n        raise DiffError(\n            \"Command '{}' returned non-zero exit status {}\".format(\n                subprocess.list2cmdline(invocation), proc.returncode\n            ),\n            errs,\n        )\n    if args.in_place:\n        return [], errs\n    return make_diff(file, original, outs), errs\n\n\ndef bold_red(s):\n    return '\\x1b[1m\\x1b[31m' + s + '\\x1b[0m'\n\n\ndef colorize(diff_lines):\n    def bold(s):\n        return '\\x1b[1m' + s + '\\x1b[0m'\n\n    def cyan(s):\n        return '\\x1b[36m' + s + '\\x1b[0m'\n\n    def green(s):\n        return '\\x1b[32m' + s + '\\x1b[0m'\n\n    def red(s):\n        return '\\x1b[31m' + s + '\\x1b[0m'\n\n    for line in diff_lines:\n        if line[:4] in ['--- ', '+++ ']:\n            yield bold(line)\n        elif line.startswith('@@ '):\n            yield cyan(line)\n        elif line.startswith('+'):\n            yield green(line)\n        elif line.startswith('-'):\n            yield red(line)\n        else:\n            yield line\n\n\ndef print_diff(diff_lines, use_color):\n    if use_color:\n        diff_lines = colorize(diff_lines)\n    if sys.version_info[0] < 3:\n        sys.stdout.writelines((l.encode('utf-8') for l in diff_lines))\n    else:\n        sys.stdout.writelines(diff_lines)\n\n\ndef print_trouble(prog, message, use_colors):\n    error_text = 'error:'\n    if use_colors:\n        error_text = bold_red(error_text)\n    print(\"{}: {} {}\".format(prog, error_text, message), file=sys.stderr)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description=__doc__)\n    parser.add_argument(\n        '--clang-format-executable',\n        metavar='EXECUTABLE',\n        help='path to the clang-format executable',\n        default='clang-format')\n    parser.add_argument(\n        '--extensions',\n        help='comma separated list of file extensions (default: {})'.format(\n            DEFAULT_EXTENSIONS),\n        default=DEFAULT_EXTENSIONS)\n    parser.add_argument(\n        '-r',\n        '--recursive',\n        action='store_true',\n        help='run recursively over directories')\n    parser.add_argument(\n        '-d',\n        '--dry-run',\n        action='store_true',\n        help='just print the list of files')\n    parser.add_argument(\n        '-i',\n        '--in-place',\n        action='store_true',\n        help='format file instead of printing differences')\n    parser.add_argument('files', metavar='file', nargs='+')\n    parser.add_argument(\n        '-q',\n        '--quiet',\n        action='store_true',\n        help=\"disable output, useful for the exit code\")\n    parser.add_argument(\n        '-j',\n        metavar='N',\n        type=int,\n        default=0,\n        help='run N clang-format jobs in parallel'\n        ' (default number of cpus + 1)')\n    parser.add_argument(\n        '--color',\n        default='auto',\n        choices=['auto', 'always', 'never'],\n        help='show colored diff (default: auto)')\n    parser.add_argument(\n        '-e',\n        '--exclude',\n        metavar='PATTERN',\n        action='append',\n        default=[],\n        help='exclude paths matching the given glob-like pattern(s)'\n        ' from recursive search')\n    parser.add_argument(\n        '--style',\n        help='formatting style to apply (LLVM, Google, Chromium, Mozilla, WebKit)')\n\n    args = parser.parse_args()\n\n    # use default signal handling, like diff return SIGINT value on ^C\n    # https://bugs.python.org/issue14229#msg156446\n    signal.signal(signal.SIGINT, signal.SIG_DFL)\n    try:\n        signal.SIGPIPE\n    except AttributeError:\n        # compatibility, SIGPIPE does not exist on Windows\n        pass\n    else:\n        signal.signal(signal.SIGPIPE, signal.SIG_DFL)\n\n    colored_stdout = False\n    colored_stderr = False\n    if args.color == 'always':\n        colored_stdout = True\n        colored_stderr = True\n    elif args.color == 'auto':\n        colored_stdout = sys.stdout.isatty()\n        colored_stderr = sys.stderr.isatty()\n\n    version_invocation = [args.clang_format_executable, str(\"--version\")]\n    try:\n        subprocess.check_call(version_invocation, stdout=DEVNULL)\n    except subprocess.CalledProcessError as e:\n        print_trouble(parser.prog, str(e), use_colors=colored_stderr)\n        return ExitStatus.TROUBLE\n    except OSError as e:\n        print_trouble(\n            parser.prog,\n            \"Command '{}' failed to start: {}\".format(\n                subprocess.list2cmdline(version_invocation), e\n            ),\n            use_colors=colored_stderr,\n        )\n        return ExitStatus.TROUBLE\n\n    retcode = ExitStatus.SUCCESS\n\n    excludes = excludes_from_file(DEFAULT_CLANG_FORMAT_IGNORE)\n    excludes.extend(args.exclude)\n\n    files = list_files(\n        args.files,\n        recursive=args.recursive,\n        exclude=excludes,\n        extensions=args.extensions.split(','))\n\n    if not files:\n        return\n\n    njobs = args.j\n    if njobs == 0:\n        njobs = multiprocessing.cpu_count() + 1\n    njobs = min(len(files), njobs)\n\n    if njobs == 1:\n        # execute directly instead of in a pool,\n        # less overhead, simpler stacktraces\n        it = (run_clang_format_diff_wrapper(args, file) for file in files)\n        pool = None\n    else:\n        pool = multiprocessing.Pool(njobs)\n        it = pool.imap_unordered(\n            partial(run_clang_format_diff_wrapper, args), files)\n        pool.close()\n    while True:\n        try:\n            outs, errs = next(it)\n        except StopIteration:\n            break\n        except DiffError as e:\n            print_trouble(parser.prog, str(e), use_colors=colored_stderr)\n            retcode = ExitStatus.TROUBLE\n            sys.stderr.writelines(e.errs)\n        except UnexpectedError as e:\n            print_trouble(parser.prog, str(e), use_colors=colored_stderr)\n            sys.stderr.write(e.formatted_traceback)\n            retcode = ExitStatus.TROUBLE\n            # stop at the first unexpected error,\n            # something could be very wrong,\n            # don't process all files unnecessarily\n            if pool:\n                pool.terminate()\n            break\n        else:\n            sys.stderr.writelines(errs)\n            if outs == []:\n                continue\n            if not args.quiet:\n                print_diff(outs, use_color=colored_stdout)\n            if retcode == ExitStatus.SUCCESS:\n                retcode = ExitStatus.DIFF\n    if pool:\n        pool.join()\n    return retcode\n\n\nif __name__ == '__main__':\n    sys.exit(main())\n"
  },
  {
    "path": "scripts/linux/run-clang-format.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nSCRIPT_DIR=$(dirname \"${BASH_SOURCE[0]}\")\nPROJECT_DIR=$(dirname $(dirname \"${SCRIPT_DIR}\"))\npython ${SCRIPT_DIR}/run-clang-format.py \\\n\t--clang-format-executable=clang-format-3.9 \\\n\t-i \\\n\t-r ${PROJECT_DIR}/src ${PROJECT_DIR}/include\n"
  },
  {
    "path": "scripts/linux/start_zk.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n#\n# Options:\n#    INSTALL_DIR    <dir>\n#    PORT           <port>\n\nPROJECT_DIR=$(realpath $(dirname $(dirname $(dirname \"${BASH_SOURCE[0]}\"))))\n\nif [ -z \"$INSTALL_DIR\" ]\nthen\n    echo \"ERROR: no INSTALL_DIR specified\"\n    exit 1\nfi\n\nif [ -z \"$PORT\" ]\nthen\n    echo \"ERROR: no PORT specified\"\n    exit 1\nfi\n\nmkdir -p $INSTALL_DIR\nif [ $? -ne 0 ]\nthen\n    echo \"ERROR: mkdir $PREFIX failed\"\n    exit 1\nfi\n\ncd $INSTALL_DIR\n\nZOOKEEPER_ROOT=apache-zookeeper-3.7.0-bin\nZOOKEEPER_TAR_NAME=${ZOOKEEPER_ROOT}.tar.gz\nZOOKEEPER_TAR_MD5_VALUE=\"8ffa97e7e6b0b2cf1d022e5156a7561a\"\n\nif [ ! -f $ZOOKEEPER_TAR_NAME ]; then\n    echo \"Downloading zookeeper...\"\n    download_url=\"http://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com/apache-zookeeper-3.7.0-bin.tar.gz\"\n    if ! wget -T 5 -t 1 $download_url; then\n        echo \"ERROR: download zookeeper failed\"\n        exit 1\n    fi\n    if [ `md5sum $ZOOKEEPER_TAR_NAME | awk '{print$1}'` != $ZOOKEEPER_TAR_MD5_VALUE ]; then\n        echo \"check file $ZOOKEEPER_TAR_NAME md5sum failed!\"\n        exit 1\n    fi\nfi\n\nif [ ! -d $ZOOKEEPER_ROOT ]; then\n    echo \"Decompressing zookeeper...\"\n    if ! tar xf $ZOOKEEPER_TAR_NAME; then\n        echo \"ERROR: decompress zookeeper failed\"\n        exit 1\n    fi\nfi\n\nZOOKEEPER_HOME=`pwd`/$ZOOKEEPER_ROOT\nZOOKEEPER_PORT=$PORT\n\ncp $ZOOKEEPER_HOME/conf/zoo_sample.cfg $ZOOKEEPER_HOME/conf/zoo.cfg\nsed -i \"s@dataDir=/tmp/zookeeper@dataDir=$ZOOKEEPER_HOME/data@\" $ZOOKEEPER_HOME/conf/zoo.cfg\nsed -i \"s@clientPort=2181@clientPort=$ZOOKEEPER_PORT@\" $ZOOKEEPER_HOME/conf/zoo.cfg\necho \"admin.enableServer=false\" >> $ZOOKEEPER_HOME/conf/zoo.cfg\necho \"4lw.commands.whitelist=ruok\" >> $ZOOKEEPER_HOME/conf/zoo.cfg\n\nmkdir -p $ZOOKEEPER_HOME/data\n$ZOOKEEPER_HOME/bin/zkServer.sh start\nsleep 1\n\nzk_check_count=0\nwhile true; do\n    sleep 1 # wait until zookeeper bootstrapped\n    if echo ruok | nc localhost \"$ZOOKEEPER_PORT\" | grep -q imok; then\n        echo \"Zookeeper started at port $ZOOKEEPER_PORT\"\n        exit 0\n    fi\n    zk_check_count=$((zk_check_count+1))\n    echo \"ERROR: starting zookeeper has failed ${zk_check_count} times\"\n    if [ $zk_check_count -gt 30 ]; then\n        echo \"ERROR: failed to start zookeeper in 30 seconds\"\n        exit 1\n    fi\ndone\n"
  },
  {
    "path": "scripts/linux/stop_zk.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n#\n# Options:\n#    INSTALL_DIR    <dir>\n\nif [ -z \"$INSTALL_DIR\" ]\nthen\n    echo \"ERROR: no INSTALL_DIR specified\"\n    exit 1\nfi\n\ncd $INSTALL_DIR\n\nZOOKEEPER_HOME=`pwd`/apache-zookeeper-3.7.0-bin\n\nif [ -d \"$ZOOKEEPER_HOME\" ]\nthen\n    $ZOOKEEPER_HOME/bin/zkServer.sh stop\nfi\n"
  },
  {
    "path": "src/CMakeLists.txt",
    "content": "if(UNIX)\n    add_compile_options(-fPIC)\nendif()\nadd_subdirectory(runtime)\nadd_subdirectory(aio)\nadd_subdirectory(zookeeper)\nadd_subdirectory(perf_counter)\nadd_subdirectory(failure_detector)\nadd_subdirectory(remote_cmd)\nadd_subdirectory(nfs)\nadd_subdirectory(block_service)\nadd_subdirectory(http)\nadd_subdirectory(client)\nadd_subdirectory(common)\nadd_subdirectory(replica)\nadd_subdirectory(meta)\nadd_subdirectory(tools)\nadd_subdirectory(utils)\n"
  },
  {
    "path": "src/aio/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_aio)\n\n#Source files under CURRENT project directory will be automatically included.\n#You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#Search mode for source files under CURRENT project directory ?\n#\"GLOB_RECURSE\" for recursive search\n#\"GLOB\" for non - recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_runtime)\n\n#Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/aio/aio_provider.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"aio_provider.h\"\n#include \"disk_engine.h\"\n\nnamespace dsn {\n\naio_provider::aio_provider(disk_engine *disk) : _engine(disk) {}\n\nvoid aio_provider::complete_io(aio_task *aio, error_code err, uint64_t bytes)\n{\n    _engine->complete_io(aio, err, bytes);\n}\n\nnamespace tools {\nnamespace internal_use_only {\nbool register_component_provider(const char *name, aio_provider::factory f, dsn::provider_type type)\n{\n    return dsn::utils::factory_store<aio_provider>::register_factory(name, f, type);\n}\n} // namespace internal_use_only\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/aio_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/aio_task.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/utility/factory_store.h>\n\nnamespace dsn {\n\nclass disk_engine;\nclass service_node;\nclass task_worker_pool;\nclass task_queue;\n\n#define DSN_INVALID_FILE_HANDLE ((dsn_handle_t)(uintptr_t)-1)\n\nclass aio_provider\n{\npublic:\n    template <typename T>\n    static aio_provider *create(disk_engine *disk)\n    {\n        return new T(disk);\n    }\n\n    typedef aio_provider *(*factory)(disk_engine *);\n\n    explicit aio_provider(disk_engine *disk);\n    virtual ~aio_provider() = default;\n\n    // return DSN_INVALID_FILE_HANDLE if failed\n    // TODO(wutao1): return uint64_t instead (because we only support linux now)\n    virtual dsn_handle_t open(const char *file_name, int flag, int pmode) = 0;\n\n    virtual error_code close(dsn_handle_t fh) = 0;\n    virtual error_code flush(dsn_handle_t fh) = 0;\n    virtual error_code write(const aio_context &aio_ctx, /*out*/ uint64_t *processed_bytes) = 0;\n    virtual error_code read(const aio_context &aio_ctx, /*out*/ uint64_t *processed_bytes) = 0;\n\n    // Submits the aio_task to the underlying disk-io executor.\n    // This task may not be executed immediately, call `aio_task::wait`\n    // to wait until it completes.\n    virtual void submit_aio_task(aio_task *aio) = 0;\n\n    virtual aio_context *prepare_aio_context(aio_task *) = 0;\n\n    void complete_io(aio_task *aio, error_code err, uint64_t bytes);\n\nprivate:\n    disk_engine *_engine;\n};\n\nnamespace tools {\nnamespace internal_use_only {\nDSN_API bool\nregister_component_provider(const char *name, aio_provider::factory f, dsn::provider_type type);\n} // namespace internal_use_only\n} // namespace tools\n\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/aio_task.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"runtime/task/task_engine.h\"\n#include <dsn/tool-api/file_io.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/utils/latency_tracer.h>\n\nnamespace dsn {\n\naio_task::aio_task(dsn::task_code code, const aio_handler &cb, int hash, service_node *node)\n    : aio_task(code, aio_handler(cb), hash, node)\n{\n}\n\naio_task::aio_task(dsn::task_code code, aio_handler &&cb, int hash, service_node *node)\n    : task(code, hash, node), _cb(std::move(cb))\n{\n    _is_null = (_cb == nullptr);\n\n    dassert(TASK_TYPE_AIO == spec().type,\n            \"%s is not of AIO type, please use DEFINE_TASK_CODE_AIO to define the task code\",\n            spec().name.c_str());\n    set_error_code(ERR_IO_PENDING);\n\n    _aio_ctx = file::prepare_aio_context(this);\n\n    _tracer = std::make_shared<dsn::utils::latency_tracer>(true, \"aio_task\", 0, code);\n}\n\nvoid aio_task::collapse()\n{\n    if (!_unmerged_write_buffers.empty()) {\n        std::shared_ptr<char> buffer(dsn::utils::make_shared_array<char>(_aio_ctx->buffer_size));\n        char *dest = buffer.get();\n        for (const dsn_file_buffer_t &b : _unmerged_write_buffers) {\n            ::memcpy(dest, b.buffer, b.size);\n            dest += b.size;\n        }\n        dassert(dest - buffer.get() == _aio_ctx->buffer_size,\n                \"%u VS %u\",\n                dest - buffer.get(),\n                _aio_ctx->buffer_size);\n        _aio_ctx->buffer = buffer.get();\n        _merged_write_buffer_holder.assign(std::move(buffer), 0, _aio_ctx->buffer_size);\n    }\n}\n\nvoid aio_task::enqueue(error_code err, size_t transferred_size)\n{\n    set_error_code(err);\n    _transferred_size = transferred_size;\n\n    spec().on_aio_enqueue.execute(this);\n\n    task::enqueue(node()->computation()->get_pool(spec().pool_code));\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/disk_engine.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/aio_task.h>\n#include <dsn/utility/flags.h>\n\n#include \"disk_engine.h\"\n#include \"runtime/service_engine.h\"\n#include \"native_linux_aio_provider.h\"\n\nusing namespace dsn::utils;\n\nnamespace dsn {\nDEFINE_TASK_CODE_AIO(LPC_AIO_BATCH_WRITE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nconst char *native_aio_provider = \"dsn::tools::native_aio_provider\";\nDSN_REGISTER_COMPONENT_PROVIDER(native_linux_aio_provider, native_aio_provider);\n\nstruct disk_engine_initializer\n{\n    disk_engine_initializer() { disk_engine::instance(); }\n};\n\n// make disk_engine destructed after service_engine, which is inited in dsn_global_init,\n// because service_engine relies on the former to close files.\nstatic disk_engine_initializer disk_engine_init;\n\n//----------------- disk_file ------------------------\naio_task *disk_write_queue::unlink_next_workload(void *plength)\n{\n    uint64_t next_offset = 0;\n    uint64_t &sz = *(uint64_t *)plength;\n    sz = 0;\n\n    aio_task *first = _hdr._first, *current = first, *last = first;\n    while (nullptr != current) {\n        auto io = current->get_aio_context();\n        if (sz == 0) {\n            sz = io->buffer_size;\n            next_offset = io->file_offset + sz;\n        } else {\n            // batch condition\n            if (next_offset == io->file_offset && sz + io->buffer_size <= _max_batch_bytes) {\n                sz += io->buffer_size;\n                next_offset += io->buffer_size;\n            }\n\n            // no batch is possible\n            else {\n                break;\n            }\n        }\n\n        // continue next\n        last = current;\n        current = (aio_task *)current->next;\n    }\n\n    // unlink [first, last] -> current\n    if (last) {\n        _hdr._first = current;\n        if (last == _hdr._last)\n            _hdr._last = nullptr;\n        last->next = nullptr;\n    }\n\n    return first;\n}\n\ndisk_file::disk_file(dsn_handle_t handle) : _handle(handle) {}\n\naio_task *disk_file::read(aio_task *tsk)\n{\n    tsk->add_ref(); // release on completion, see `on_read_completed`.\n    return _read_queue.add_work(tsk, nullptr);\n}\n\naio_task *disk_file::write(aio_task *tsk, void *ctx)\n{\n    tsk->add_ref(); // release on completion\n    return _write_queue.add_work(tsk, ctx);\n}\n\naio_task *disk_file::on_read_completed(aio_task *wk, error_code err, size_t size)\n{\n    dassert(wk->next == nullptr, \"\");\n    auto ret = _read_queue.on_work_completed(wk, nullptr);\n    wk->enqueue(err, size);\n    wk->release_ref(); // added in above read\n\n    return ret;\n}\n\naio_task *disk_file::on_write_completed(aio_task *wk, void *ctx, error_code err, size_t size)\n{\n    auto ret = _write_queue.on_work_completed(wk, ctx);\n\n    while (wk) {\n        aio_task *next = (aio_task *)wk->next;\n        wk->next = nullptr;\n\n        if (err == ERR_OK) {\n            size_t this_size = (size_t)wk->get_aio_context()->buffer_size;\n            dcheck_ge(size, this_size);\n            wk->enqueue(err, this_size);\n            size -= this_size;\n        } else {\n            wk->enqueue(err, size);\n        }\n\n        wk->release_ref(); // added in above write\n\n        wk = next;\n    }\n\n    if (err == ERR_OK) {\n        dassert(size == 0, \"written buffer size does not equal to input buffer's size\");\n    }\n\n    return ret;\n}\n\n//----------------- disk_engine ------------------------\ndisk_engine::disk_engine()\n{\n    aio_provider *provider = utils::factory_store<aio_provider>::create(\n        native_aio_provider, dsn::PROVIDER_TYPE_MAIN, this);\n    _provider.reset(provider);\n}\n\nclass batch_write_io_task : public aio_task\n{\npublic:\n    explicit batch_write_io_task(aio_task *tasks)\n        : aio_task(LPC_AIO_BATCH_WRITE, nullptr), _tasks(tasks)\n    {\n    }\n\n    virtual void exec() override\n    {\n        auto df = (disk_file *)_tasks->get_aio_context()->file_object;\n        uint64_t sz;\n\n        auto wk = df->on_write_completed(_tasks, (void *)&sz, error(), get_transferred_size());\n        if (wk) {\n            wk->get_aio_context()->engine->process_write(wk, sz);\n        }\n    }\n\npublic:\n    aio_task *_tasks;\n};\n\nvoid disk_engine::write(aio_task *aio)\n{\n    if (!aio->spec().on_aio_call.execute(task::get_current_task(), aio, true)) {\n        aio->enqueue(ERR_FILE_OPERATION_FAILED, 0);\n        return;\n    }\n\n    auto dio = aio->get_aio_context();\n    auto df = (disk_file *)dio->file;\n    dio->file = df->native_handle();\n    dio->file_object = df;\n    dio->engine = this;\n    dio->type = AIO_Write;\n\n    uint64_t sz;\n    auto wk = df->write(aio, &sz);\n    if (wk) {\n        process_write(wk, sz);\n    }\n}\n\nvoid disk_engine::process_write(aio_task *aio, uint64_t sz)\n{\n    aio_context *dio = aio->get_aio_context();\n\n    // no batching\n    if (dio->buffer_size == sz) {\n        aio->collapse();\n        _provider->submit_aio_task(aio);\n    }\n\n    // batching\n    else {\n        // setup io task\n        auto new_task = new batch_write_io_task(aio);\n        auto new_dio = new_task->get_aio_context();\n        new_dio->buffer_size = sz;\n        new_dio->file_offset = dio->file_offset;\n        new_dio->file = dio->file;\n        new_dio->file_object = dio->file_object;\n        new_dio->engine = dio->engine;\n        new_dio->type = AIO_Write;\n\n        auto cur_task = aio;\n        do {\n            auto cur_dio = cur_task->get_aio_context();\n            if (cur_dio->buffer) {\n                dsn_file_buffer_t buf;\n                buf.buffer = cur_dio->buffer;\n                buf.size = cur_dio->buffer_size;\n                new_task->_unmerged_write_buffers.push_back(std::move(buf));\n            } else {\n                new_task->_unmerged_write_buffers.insert(new_task->_unmerged_write_buffers.end(),\n                                                         cur_task->_unmerged_write_buffers.begin(),\n                                                         cur_task->_unmerged_write_buffers.end());\n            }\n            cur_task = (aio_task *)cur_task->next;\n        } while (cur_task);\n\n        new_task->add_ref(); // released in complete_io\n        process_write(new_task, sz);\n    }\n}\n\nvoid disk_engine::complete_io(aio_task *aio, error_code err, uint64_t bytes)\n{\n    if (err != ERR_OK) {\n        dinfo(\"disk operation failure with code %s, err = %s, aio_task_id = %016\" PRIx64,\n              aio->spec().name.c_str(),\n              err.to_string(),\n              aio->id());\n    }\n\n    // batching\n    if (aio->code() == LPC_AIO_BATCH_WRITE) {\n        aio->enqueue(err, (size_t)bytes);\n        aio->release_ref(); // added in process_write\n    }\n\n    // no batching\n    else {\n        auto df = (disk_file *)(aio->get_aio_context()->file_object);\n        if (aio->get_aio_context()->type == AIO_Read) {\n            auto wk = df->on_read_completed(aio, err, (size_t)bytes);\n            if (wk) {\n                _provider->submit_aio_task(wk);\n            }\n        }\n\n        // write\n        else {\n            uint64_t sz;\n            auto wk = df->on_write_completed(aio, (void *)&sz, err, (size_t)bytes);\n            if (wk) {\n                process_write(wk, sz);\n            }\n        }\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/disk_engine.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"aio_provider.h\"\n\n#include <dsn/tool_api.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/work_queue.h>\n\nnamespace dsn {\n\nclass disk_write_queue : public work_queue<aio_task>\n{\npublic:\n    disk_write_queue() : work_queue(2)\n    {\n        _max_batch_bytes = 1024 * 1024; // 1 MB\n    }\n\nprivate:\n    virtual aio_task *unlink_next_workload(void *plength) override;\n\nprivate:\n    uint32_t _max_batch_bytes;\n};\n\nclass disk_file\n{\npublic:\n    disk_file(dsn_handle_t handle);\n    aio_task *read(aio_task *tsk);\n    aio_task *write(aio_task *tsk, void *ctx);\n\n    aio_task *on_read_completed(aio_task *wk, error_code err, size_t size);\n    aio_task *on_write_completed(aio_task *wk, void *ctx, error_code err, size_t size);\n\n    // TODO(wutao1): make it uint64_t\n    dsn_handle_t native_handle() const { return _handle; }\n\nprivate:\n    dsn_handle_t _handle;\n    disk_write_queue _write_queue;\n    work_queue<aio_task> _read_queue;\n};\n\nclass disk_engine : public utils::singleton<disk_engine>\n{\npublic:\n    void write(aio_task *aio);\n    static aio_provider &provider() { return *instance()._provider.get(); }\n\nprivate:\n    // the object of disk_engine must be created by `singleton::instance`\n    disk_engine();\n    ~disk_engine() = default;\n\n    void process_write(aio_task *wk, uint64_t sz);\n    void complete_io(aio_task *aio, error_code err, uint64_t bytes);\n\n    std::unique_ptr<aio_provider> _provider;\n\n    friend class aio_provider;\n    friend class batch_write_io_task;\n    friend class utils::singleton<disk_engine>;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/file_io.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"disk_engine.h\"\n#include <dsn/tool-api/file_io.h>\n\nnamespace dsn {\nnamespace file {\n\n/*extern*/ disk_file *open(const char *file_name, int flag, int pmode)\n{\n    dsn_handle_t nh = disk_engine::provider().open(file_name, flag, pmode);\n    if (nh != DSN_INVALID_FILE_HANDLE) {\n        return new disk_file(nh);\n    } else {\n        return nullptr;\n    }\n}\n\n/*extern*/ error_code close(disk_file *file)\n{\n    error_code result = ERR_INVALID_HANDLE;\n    if (file != nullptr) {\n        result = disk_engine::provider().close(file->native_handle());\n        delete file;\n        file = nullptr;\n    }\n    return result;\n}\n\n/*extern*/ error_code flush(disk_file *file)\n{\n    if (nullptr != file) {\n        return disk_engine::provider().flush(file->native_handle());\n    } else {\n        return ERR_INVALID_HANDLE;\n    }\n}\n\n/*extern*/ aio_task_ptr read(disk_file *file,\n                             char *buffer,\n                             int count,\n                             uint64_t offset,\n                             task_code callback_code,\n                             task_tracker *tracker,\n                             aio_handler &&callback,\n                             int hash /*= 0*/)\n{\n    auto cb = create_aio_task(callback_code, tracker, std::move(callback), hash);\n    cb->get_aio_context()->buffer = buffer;\n    cb->get_aio_context()->buffer_size = count;\n    cb->get_aio_context()->file_object = file;\n    cb->get_aio_context()->file = file->native_handle();\n    cb->get_aio_context()->file_offset = offset;\n    cb->get_aio_context()->type = AIO_Read;\n    cb->get_aio_context()->engine = &disk_engine::instance();\n\n    if (!cb->spec().on_aio_call.execute(task::get_current_task(), cb, true)) {\n        cb->enqueue(ERR_FILE_OPERATION_FAILED, 0);\n        return cb;\n    }\n    auto wk = file->read(cb);\n    if (wk) {\n        disk_engine::provider().submit_aio_task(wk);\n    }\n    return cb;\n}\n\n/*extern*/ aio_task_ptr write(disk_file *file,\n                              const char *buffer,\n                              int count,\n                              uint64_t offset,\n                              task_code callback_code,\n                              task_tracker *tracker,\n                              aio_handler &&callback,\n                              int hash /*= 0*/)\n{\n    auto cb = create_aio_task(callback_code, tracker, std::move(callback), hash);\n    cb->get_aio_context()->buffer = (char *)buffer;\n    cb->get_aio_context()->buffer_size = count;\n    cb->get_aio_context()->file = file;\n    cb->get_aio_context()->file_offset = offset;\n    cb->get_aio_context()->type = AIO_Write;\n\n    disk_engine::instance().write(cb);\n    return cb;\n}\n\n/*extern*/ aio_task_ptr write_vector(disk_file *file,\n                                     const dsn_file_buffer_t *buffers,\n                                     int buffer_count,\n                                     uint64_t offset,\n                                     task_code callback_code,\n                                     task_tracker *tracker,\n                                     aio_handler &&callback,\n                                     int hash /*= 0*/)\n{\n    auto cb = create_aio_task(callback_code, tracker, std::move(callback), hash);\n    cb->get_aio_context()->file = file;\n    cb->get_aio_context()->file_offset = offset;\n    cb->get_aio_context()->type = AIO_Write;\n    for (int i = 0; i < buffer_count; i++) {\n        if (buffers[i].size > 0) {\n            cb->_unmerged_write_buffers.push_back(buffers[i]);\n            cb->get_aio_context()->buffer_size += buffers[i].size;\n        }\n    }\n\n    disk_engine::instance().write(cb);\n    return cb;\n}\n\n/*extern*/ aio_context_ptr prepare_aio_context(aio_task *tsk)\n{\n    return disk_engine::provider().prepare_aio_context(tsk);\n}\n} // namespace file\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/native_linux_aio_provider.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"native_linux_aio_provider.h\"\n\n#include <fcntl.h>\n\n#include \"runtime/service_engine.h\"\n\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utils/latency_tracer.h>\n\nnamespace dsn {\n\nnative_linux_aio_provider::native_linux_aio_provider(disk_engine *disk) : aio_provider(disk) {}\n\nnative_linux_aio_provider::~native_linux_aio_provider() {}\n\ndsn_handle_t native_linux_aio_provider::open(const char *file_name, int flag, int pmode)\n{\n    dsn_handle_t fh = (dsn_handle_t)(uintptr_t)::open(file_name, flag, pmode);\n    if (fh == DSN_INVALID_FILE_HANDLE) {\n        derror(\"create file failed, err = %s\", strerror(errno));\n    }\n    return fh;\n}\n\nerror_code native_linux_aio_provider::close(dsn_handle_t fh)\n{\n    if (fh == DSN_INVALID_FILE_HANDLE || ::close((int)(uintptr_t)(fh)) == 0) {\n        return ERR_OK;\n    } else {\n        derror(\"close file failed, err = %s\", strerror(errno));\n        return ERR_FILE_OPERATION_FAILED;\n    }\n}\n\nerror_code native_linux_aio_provider::flush(dsn_handle_t fh)\n{\n    if (fh == DSN_INVALID_FILE_HANDLE || ::fsync((int)(uintptr_t)(fh)) == 0) {\n        return ERR_OK;\n    } else {\n        derror(\"flush file failed, err = %s\", strerror(errno));\n        return ERR_FILE_OPERATION_FAILED;\n    }\n}\n\nerror_code native_linux_aio_provider::write(const aio_context &aio_ctx,\n                                            /*out*/ uint64_t *processed_bytes)\n{\n    dsn::error_code resp = ERR_OK;\n    uint64_t buffer_offset = 0;\n    do {\n        // ret is the written data size\n        auto ret = pwrite(static_cast<int>((ssize_t)aio_ctx.file),\n                          (char *)aio_ctx.buffer + buffer_offset,\n                          aio_ctx.buffer_size - buffer_offset,\n                          aio_ctx.file_offset + buffer_offset);\n        if (dsn_unlikely(ret < 0)) {\n            if (errno == EINTR) {\n                dwarn_f(\"write failed with errno={} and will retry it.\", strerror(errno));\n                continue;\n            }\n            resp = ERR_FILE_OPERATION_FAILED;\n            derror_f(\"write failed with errno={}, return {}.\", strerror(errno), resp);\n            return resp;\n        }\n\n        // mock the `ret` to reproduce the `write incomplete` case in the first write\n        FAIL_POINT_INJECT_NOT_RETURN_F(\"aio_pwrite_incomplete\", [&](string_view s) -> void {\n            if (dsn_unlikely(buffer_offset == 0)) {\n                --ret;\n            }\n        });\n\n        buffer_offset += ret;\n        if (dsn_unlikely(buffer_offset != aio_ctx.buffer_size)) {\n            dwarn_f(\"write incomplete, request_size={}, total_write_size={}, this_write_size={}, \"\n                    \"and will retry it.\",\n                    aio_ctx.buffer_size,\n                    buffer_offset,\n                    ret);\n        }\n    } while (dsn_unlikely(buffer_offset < aio_ctx.buffer_size));\n\n    *processed_bytes = buffer_offset;\n    return resp;\n}\n\nerror_code native_linux_aio_provider::read(const aio_context &aio_ctx,\n                                           /*out*/ uint64_t *processed_bytes)\n{\n    ssize_t ret = pread(static_cast<int>((ssize_t)aio_ctx.file),\n                        aio_ctx.buffer,\n                        aio_ctx.buffer_size,\n                        aio_ctx.file_offset);\n    if (ret < 0) {\n        return ERR_FILE_OPERATION_FAILED;\n    }\n    if (ret == 0) {\n        return ERR_HANDLE_EOF;\n    }\n    *processed_bytes = static_cast<uint64_t>(ret);\n    return ERR_OK;\n}\n\nvoid native_linux_aio_provider::submit_aio_task(aio_task *aio_tsk)\n{\n    // for the tests which use simulator need sync submit for aio\n    if (dsn_unlikely(service_engine::instance().is_simulator())) {\n        aio_internal(aio_tsk);\n        return;\n    }\n\n    ADD_POINT(aio_tsk->_tracer);\n    tasking::enqueue(\n        aio_tsk->code(), aio_tsk->tracker(), [=]() { aio_internal(aio_tsk); }, aio_tsk->hash());\n}\n\nerror_code native_linux_aio_provider::aio_internal(aio_task *aio_tsk)\n{\n    ADD_POINT(aio_tsk->_tracer);\n    aio_context *aio_ctx = aio_tsk->get_aio_context();\n    error_code err = ERR_UNKNOWN;\n    uint64_t processed_bytes = 0;\n    switch (aio_ctx->type) {\n    case AIO_Read:\n        err = read(*aio_ctx, &processed_bytes);\n        break;\n    case AIO_Write:\n        err = write(*aio_ctx, &processed_bytes);\n        break;\n    default:\n        return err;\n    }\n\n    ADD_CUSTOM_POINT(aio_tsk->_tracer, \"completed\");\n\n    complete_io(aio_tsk, err, processed_bytes);\n    return err;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/native_linux_aio_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"aio_provider.h\"\n\nnamespace dsn {\n\nclass native_linux_aio_provider : public aio_provider\n{\npublic:\n    explicit native_linux_aio_provider(disk_engine *disk);\n    ~native_linux_aio_provider() override;\n\n    dsn_handle_t open(const char *file_name, int flag, int pmode) override;\n    error_code close(dsn_handle_t fh) override;\n    error_code flush(dsn_handle_t fh) override;\n    error_code write(const aio_context &aio_ctx, /*out*/ uint64_t *processed_bytes) override;\n    error_code read(const aio_context &aio_ctx, /*out*/ uint64_t *processed_bytes) override;\n\n    void submit_aio_task(aio_task *aio) override;\n    aio_context *prepare_aio_context(aio_task *tsk) override { return new aio_context; }\n\nprotected:\n    error_code aio_internal(aio_task *aio);\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/aio/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_aio_test)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS gtest dsn_runtime dsn_aio)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/copy_source.txt\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/aio/test/aio.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/fail_point.h>\n\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_TEST_SERVER)\nDEFINE_TASK_CODE_AIO(LPC_AIO_TEST, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER);\n\nTEST(core, aio)\n{\n    fail::setup();\n    fail::cfg(\"aio_pwrite_incomplete\", \"void()\");\n    const char *buffer = \"hello, world\";\n    int len = (int)strlen(buffer);\n\n    // write\n    auto fp = file::open(\"tmp\", O_RDWR | O_CREAT | O_BINARY, 0666);\n\n    std::list<aio_task_ptr> tasks;\n    uint64_t offset = 0;\n\n    // new write\n    for (int i = 0; i < 100; i++) {\n        auto t = ::dsn::file::write(fp, buffer, len, offset, LPC_AIO_TEST, nullptr, nullptr);\n        tasks.push_back(t);\n        offset += len;\n    }\n\n    for (auto &t : tasks) {\n        t->wait();\n    }\n\n    // overwrite\n    offset = 0;\n    tasks.clear();\n    for (int i = 0; i < 100; i++) {\n        auto t = ::dsn::file::write(fp, buffer, len, offset, LPC_AIO_TEST, nullptr, nullptr);\n        tasks.push_back(t);\n        offset += len;\n    }\n\n    for (auto &t : tasks) {\n        t->wait();\n        EXPECT_TRUE(t->get_transferred_size() == (size_t)len);\n    }\n\n    // vector write\n    tasks.clear();\n    std::unique_ptr<dsn_file_buffer_t[]> buffers(new dsn_file_buffer_t[100]);\n    for (int i = 0; i < 10; i++) {\n        buffers[i].buffer = static_cast<void *>(const_cast<char *>(buffer));\n        buffers[i].size = len;\n    }\n    for (int i = 0; i < 10; i++) {\n        tasks.push_back(::dsn::file::write_vector(\n            fp, buffers.get(), 10, offset, LPC_AIO_TEST, nullptr, nullptr));\n        offset += 10 * len;\n    }\n    for (auto &t : tasks) {\n        t->wait();\n        EXPECT_TRUE(t->get_transferred_size() == 10 * len);\n    }\n    auto err = file::close(fp);\n    EXPECT_TRUE(err == ERR_OK);\n\n    // read\n    char *buffer2 = (char *)alloca((size_t)len);\n    fp = file::open(\"tmp\", O_RDONLY | O_BINARY, 0);\n\n    // concurrent read\n    offset = 0;\n    tasks.clear();\n    for (int i = 0; i < 100; i++) {\n        auto t = ::dsn::file::read(fp, buffer2, len, offset, LPC_AIO_TEST, nullptr, nullptr);\n        tasks.push_back(t);\n        offset += len;\n    }\n\n    for (auto &t : tasks) {\n        t->wait();\n        EXPECT_TRUE(t->get_transferred_size() == (size_t)len);\n    }\n\n    // sequential read\n    offset = 0;\n    tasks.clear();\n    for (int i = 0; i < 200; i++) {\n        buffer2[0] = 'x';\n        auto t = ::dsn::file::read(fp, buffer2, len, offset, LPC_AIO_TEST, nullptr, nullptr);\n        offset += len;\n\n        t->wait();\n        EXPECT_TRUE(t->get_transferred_size() == (size_t)len);\n        EXPECT_TRUE(memcmp(buffer, buffer2, len) == 0);\n    }\n\n    err = file::close(fp);\n    fail::teardown();\n    EXPECT_TRUE(err == ERR_OK);\n\n    utils::filesystem::remove_path(\"tmp\");\n}\n\nTEST(core, aio_share)\n{\n    auto fp = file::open(\"tmp\", O_WRONLY | O_CREAT | O_BINARY, 0666);\n    EXPECT_TRUE(fp != nullptr);\n\n    auto fp2 = file::open(\"tmp\", O_RDONLY | O_BINARY, 0);\n    EXPECT_TRUE(fp2 != nullptr);\n\n    file::close(fp);\n    file::close(fp2);\n\n    utils::filesystem::remove_path(\"tmp\");\n}\n\nTEST(core, operation_failed)\n{\n    fail::setup();\n    fail::cfg(\"aio_pwrite_incomplete\", \"void()\");\n\n    auto fp = file::open(\"tmp_test_file\", O_WRONLY, 0600);\n    EXPECT_TRUE(fp == nullptr);\n\n    auto err = dsn::make_unique<dsn::error_code>();\n    auto count = dsn::make_unique<size_t>();\n    auto io_callback = [&err, &count](::dsn::error_code e, size_t n) {\n        *err = e;\n        *count = n;\n    };\n\n    fp = file::open(\"tmp_test_file\", O_WRONLY | O_CREAT | O_BINARY, 0666);\n    EXPECT_TRUE(fp != nullptr);\n    char buffer[512];\n    const char *str = \"hello file\";\n    auto t = ::dsn::file::write(fp, str, strlen(str), 0, LPC_AIO_TEST, nullptr, io_callback, 0);\n    t->wait();\n    EXPECT_TRUE(*err == ERR_OK && *count == strlen(str));\n\n    t = ::dsn::file::read(fp, buffer, 512, 0, LPC_AIO_TEST, nullptr, io_callback, 0);\n    t->wait();\n    EXPECT_TRUE(*err == ERR_FILE_OPERATION_FAILED);\n\n    auto fp2 = file::open(\"tmp_test_file\", O_RDONLY | O_BINARY, 0);\n    EXPECT_TRUE(fp2 != nullptr);\n\n    t = ::dsn::file::read(fp2, buffer, 512, 0, LPC_AIO_TEST, nullptr, io_callback, 0);\n    t->wait();\n    EXPECT_TRUE(*err == ERR_OK && *count == strlen(str));\n    EXPECT_TRUE(strncmp(buffer, str, 10) == 0);\n\n    t = ::dsn::file::read(fp2, buffer, 5, 0, LPC_AIO_TEST, nullptr, io_callback, 0);\n    t->wait();\n    EXPECT_TRUE(*err == ERR_OK && *count == 5);\n    EXPECT_TRUE(strncmp(buffer, str, 5) == 0);\n\n    t = ::dsn::file::read(fp2, buffer, 512, 100, LPC_AIO_TEST, nullptr, io_callback, 0);\n    t->wait();\n    ddebug(\"error code: %s\", err->to_string());\n    file::close(fp);\n    file::close(fp2);\n    fail::teardown();\n\n    EXPECT_TRUE(utils::filesystem::remove_path(\"tmp_test_file\"));\n}\n\nDEFINE_TASK_CODE_AIO(LPC_AIO_TEST_READ, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE_AIO(LPC_AIO_TEST_WRITE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nstruct aio_result\n{\n    dsn::error_code err;\n    size_t sz;\n};\nTEST(core, dsn_file)\n{\n    int64_t fin_size, fout_size;\n    ASSERT_TRUE(utils::filesystem::file_size(\"copy_source.txt\", fin_size));\n    ASSERT_LT(0, fin_size);\n\n    dsn::disk_file *fin = file::open(\"copy_source.txt\", O_RDONLY, 0);\n    ASSERT_NE(nullptr, fin);\n    dsn::disk_file *fout = file::open(\"copy_dest.txt\", O_RDWR | O_CREAT | O_TRUNC, 0666);\n    ASSERT_NE(nullptr, fout);\n    char buffer[1024];\n    uint64_t offset = 0;\n    while (true) {\n        aio_result rin;\n        aio_task_ptr tin = file::read(fin,\n                                      buffer,\n                                      1024,\n                                      offset,\n                                      LPC_AIO_TEST_READ,\n                                      nullptr,\n                                      [&rin](dsn::error_code err, size_t sz) {\n                                          rin.err = err;\n                                          rin.sz = sz;\n                                      },\n                                      0);\n        ASSERT_NE(nullptr, tin);\n\n        if (dsn::tools::get_current_tool()->name() != \"simulator\") {\n            // at least 1 for tin, but if already read completed, then only 1\n            ASSERT_LE(1, tin->get_count());\n        }\n\n        tin->wait();\n        ASSERT_EQ(rin.err, tin->error());\n        if (rin.err != ERR_OK) {\n            ASSERT_EQ(ERR_HANDLE_EOF, rin.err);\n            break;\n        }\n        ASSERT_LT(0u, rin.sz);\n        ASSERT_EQ(rin.sz, tin->get_transferred_size());\n        // this is only true for simulator\n        if (dsn::tools::get_current_tool()->name() == \"simulator\") {\n            ASSERT_EQ(1, tin->get_count());\n        }\n\n        aio_result rout;\n        aio_task_ptr tout = file::write(fout,\n                                        buffer,\n                                        rin.sz,\n                                        offset,\n                                        LPC_AIO_TEST_WRITE,\n                                        nullptr,\n                                        [&rout](dsn::error_code err, size_t sz) {\n                                            rout.err = err;\n                                            rout.sz = sz;\n                                        },\n                                        0);\n        ASSERT_NE(nullptr, tout);\n        tout->wait();\n        ASSERT_EQ(ERR_OK, rout.err);\n        ASSERT_EQ(ERR_OK, tout->error());\n        ASSERT_EQ(rin.sz, rout.sz);\n        ASSERT_EQ(rin.sz, tout->get_transferred_size());\n        // this is only true for simulator\n        if (dsn::tools::get_current_tool()->name() == \"simulator\") {\n            ASSERT_EQ(1, tout->get_count());\n        }\n\n        ASSERT_EQ(ERR_OK, file::flush(fout));\n\n        offset += rin.sz;\n    }\n\n    ASSERT_EQ((uint64_t)fin_size, offset);\n    ASSERT_EQ(ERR_OK, file::close(fout));\n    ASSERT_EQ(ERR_OK, file::close(fin));\n\n    ASSERT_TRUE(utils::filesystem::file_size(\"copy_dest.txt\", fout_size));\n    ASSERT_EQ(fin_size, fout_size);\n}\n"
  },
  {
    "path": "src/aio/test/clear.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf data dsn_aio_test.xml copy_dest.txt\n"
  },
  {
    "path": "src/aio/test/config.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n\n[apps.mimic]\ntype = dsn.app.mimic\narguments =\nports = 20101\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\nrun = true\ncount = 1\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[core]\nenable_default_app_mimic = true\ntool = nativerun\npause_on_start = false\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n"
  },
  {
    "path": "src/aio/test/copy_source.txt",
    "content": "\nhelp\nhelp engine\nhelp unexist-cmd\nengine\ntask-code\nconfig-dump config-dump.ini\ntest-cmd this is test argument\nunexist-cmd arg1 arg2\n\n"
  },
  {
    "path": "src/aio/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n    dsn_run_config(\"config.ini\", false);\n    int g_test_ret = RUN_ALL_TESTS();\n#ifndef ENABLE_GCOV\n    dsn_exit(g_test_ret);\n#endif\n    return g_test_ret;\n}\n"
  },
  {
    "path": "src/aio/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn_aio_test.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_aio_test\n"
  },
  {
    "path": "src/block_service/CMakeLists.txt",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nset(MY_PROJ_NAME dsn.block_service)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem)\n\ndsn_add_static_library()\n\nadd_subdirectory(fds)\nadd_subdirectory(hdfs)\nadd_subdirectory(local)\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/block_service/block_service_manager.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"block_service_manager.h\"\n#include \"block_service/fds/fds_service.h\"\n#include \"block_service/hdfs/hdfs_service.h\"\n#include \"block_service/local/local_service.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/filesystem.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nblock_service_registry::block_service_registry()\n{\n    bool ans;\n    ans = utils::factory_store<block_filesystem>::register_factory(\n        \"fds_service\", block_filesystem::create<fds_service>, PROVIDER_TYPE_MAIN);\n    dassert(ans, \"register fds_service failed\");\n\n    ans = utils::factory_store<block_filesystem>::register_factory(\n        \"hdfs_service\", block_filesystem::create<hdfs_service>, PROVIDER_TYPE_MAIN);\n    dassert(ans, \"register hdfs_service failed\");\n\n    ans = utils::factory_store<block_filesystem>::register_factory(\n        \"local_service\", block_filesystem::create<local_service>, PROVIDER_TYPE_MAIN);\n    dassert(ans, \"register local_service failed\");\n}\n\nblock_service_manager::block_service_manager()\n    : // we got a instance of block_service_registry each time we create a block_service_manger\n      // to make sure that the filesystem providers are registered\n      _registry_holder(block_service_registry::instance())\n{\n}\n\nblock_service_manager::~block_service_manager()\n{\n    ddebug(\"close block service manager.\");\n    zauto_write_lock l(_fs_lock);\n    _fs_map.clear();\n}\n\nblock_filesystem *block_service_manager::get_or_create_block_filesystem(const std::string &provider)\n{\n    zauto_write_lock l(_fs_lock);\n    auto iter = _fs_map.find(provider);\n    if (iter != _fs_map.end()) {\n        return iter->second.get();\n    }\n\n    const char *provider_type = dsn_config_get_value_string(\n        (std::string(\"block_service.\") + provider).c_str(), \"type\", \"\", \"block service type\");\n\n    block_filesystem *fs =\n        utils::factory_store<block_filesystem>::create(provider_type, PROVIDER_TYPE_MAIN);\n    if (fs == nullptr) {\n        derror_f(\"acquire block filesystem failed, provider = {}, provider_type = {}\",\n                 provider,\n                 std::string(provider_type));\n        return nullptr;\n    }\n\n    const char *arguments = dsn_config_get_value_string(\n        (std::string(\"block_service.\") + provider).c_str(), \"args\", \"\", \"args for block_service\");\n\n    std::vector<std::string> args;\n    utils::split_args(arguments, args);\n    dsn::error_code err = fs->initialize(args);\n\n    if (dsn::ERR_OK == err) {\n        ddebug_f(\"create block filesystem ok for provider {}\", provider);\n        _fs_map.emplace(provider, std::unique_ptr<block_filesystem>(fs));\n    } else {\n        derror_f(\"create block file system err {} for provider {}\",\n                 std::string(err.to_string()),\n                 provider);\n        delete fs;\n        fs = nullptr;\n    }\n    return fs;\n}\n\nstatic create_file_response create_block_file_sync(const std::string &remote_file_path,\n                                                   bool ignore_meta,\n                                                   block_filesystem *fs,\n                                                   task_tracker *tracker)\n{\n    create_file_response ret;\n    fs->create_file(create_file_request{remote_file_path, ignore_meta},\n                    TASK_CODE_EXEC_INLINED,\n                    [&ret](const create_file_response &resp) { ret = resp; },\n                    tracker);\n    tracker->wait_outstanding_tasks();\n    return ret;\n}\n\nstatic download_response\ndownload_block_file_sync(const std::string &local_file_path, block_file *bf, task_tracker *tracker)\n{\n    download_response ret;\n    bf->download(download_request{local_file_path, 0, -1},\n                 TASK_CODE_EXEC_INLINED,\n                 [&ret](const download_response &resp) { ret = resp; },\n                 tracker);\n    tracker->wait_outstanding_tasks();\n    return ret;\n}\n\nerror_code block_service_manager::download_file(const std::string &remote_dir,\n                                                const std::string &local_dir,\n                                                const std::string &file_name,\n                                                block_filesystem *fs,\n                                                /*out*/ uint64_t &download_file_size)\n{\n    std::string md5;\n    return download_file(remote_dir, local_dir, file_name, fs, download_file_size, md5);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION, THREAD_POOL_DEFAULT\nerror_code block_service_manager::download_file(const std::string &remote_dir,\n                                                const std::string &local_dir,\n                                                const std::string &file_name,\n                                                block_filesystem *fs,\n                                                /*out*/ uint64_t &download_file_size,\n                                                /*out*/ std::string &download_file_md5)\n{\n    // local file exists\n    const std::string local_file_name = utils::filesystem::path_combine(local_dir, file_name);\n    if (utils::filesystem::file_exists(local_file_name)) {\n        ddebug_f(\"local file({}) exists\", local_file_name);\n        return ERR_PATH_ALREADY_EXIST;\n    }\n\n    task_tracker tracker;\n\n    // Create a block_file object.\n    const std::string remote_file_name = utils::filesystem::path_combine(remote_dir, file_name);\n    auto create_resp =\n        create_block_file_sync(remote_file_name, false /*ignore file meta*/, fs, &tracker);\n    error_code err = create_resp.err;\n    if (err != ERR_OK) {\n        derror_f(\"create file({}) failed with error({})\", remote_file_name, err.to_string());\n        return err;\n    }\n    block_file_ptr bf = create_resp.file_handle;\n\n    download_response resp = download_block_file_sync(local_file_name, bf.get(), &tracker);\n    if (resp.err != ERR_OK) {\n        // during bulk load process, ERR_OBJECT_NOT_FOUND will be considered as a recoverable\n        // error, however, if file damaged on remote file provider, bulk load should stop,\n        // return ERR_CORRUPTION instead\n        if (resp.err == ERR_OBJECT_NOT_FOUND) {\n            derror_f(\"download file({}) failed, file on remote file provider is damaged\",\n                     local_file_name);\n            return ERR_CORRUPTION;\n        }\n        return resp.err;\n    }\n\n    ddebug_f(\"download file({}) succeed, file_size = {}, md5 = {}\",\n             local_file_name.c_str(),\n             resp.downloaded_size,\n             resp.file_md5);\n    download_file_size = resp.downloaded_size;\n    download_file_md5 = resp.file_md5;\n    return ERR_OK;\n}\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/block_service_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/block_service.h>\n#include <dsn/utility/singleton_store.h>\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\n// a singleton for rDSN service_engine to register all blocks, this should be called only once\nclass block_service_registry : public utils::singleton<block_service_registry>\n{\nprivate:\n    block_service_registry();\n    ~block_service_registry() = default;\n\n    friend class utils::singleton<block_service_registry>;\n};\n\n// this should be shared within a service node\n// we can't make the block_service_manager shared among service nodes because of rDSN's\n// share-nothing archiecture among different apps\nclass block_service_manager\n{\npublic:\n    block_service_manager();\n    ~block_service_manager();\n    block_filesystem *get_or_create_block_filesystem(const std::string &provider);\n\n    // download files from remote file system\n    // \\return  ERR_FILE_OPERATION_FAILED: local file system error\n    // \\return  ERR_FS_INTERNAL: remote file system error\n    // \\return  ERR_CORRUPTION: file not exist or damaged\n    // \\return  ERR_PATH_ALREADY_EXIST: local file exist\n    // if download file succeed, download_err = ERR_OK and set download_file_size\n    //\n    // TODO(wutao1): create block_filesystem_wrapper instead.\n    // NOTE: This function is not responsible for the correctness of the downloaded file.\n    // The file may be half-downloaded or corrupted due to disk failure.\n    // The users can compare checksums, and retry download if validation failed.\n    error_code download_file(const std::string &remote_dir,\n                             const std::string &local_dir,\n                             const std::string &file_name,\n                             block_filesystem *fs,\n                             /*out*/ uint64_t &download_file_size,\n                             /*out*/ std::string &download_file_md5);\n\n    error_code download_file(const std::string &remote_dir,\n                             const std::string &local_dir,\n                             const std::string &file_name,\n                             block_filesystem *fs,\n                             /*out*/ uint64_t &download_file_size);\n\nprivate:\n    block_service_registry &_registry_holder;\n\n    mutable zrwlock_nr _fs_lock;\n    std::map<std::string, std::unique_ptr<block_filesystem>> _fs_map;\n\n    friend class block_service_manager_mock;\n};\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/directio_writable_file.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <algorithm>\n#include <cstring>\n#include <fcntl.h>\n#include <stdlib.h> // posix_memalign\n#include <string>\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <unistd.h> // getpagesize\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n\n#include \"block_service/directio_writable_file.h\"\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nDSN_DEFINE_uint32(\"replication\",\n                  direct_io_buffer_pages,\n                  64,\n                  \"Number of pages we need to set to direct io buffer\");\nDSN_TAG_VARIABLE(direct_io_buffer_pages, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"replication\",\n                enable_direct_io,\n                false,\n                \"Whether to enable direct I/O when download files\");\nDSN_TAG_VARIABLE(enable_direct_io, FT_MUTABLE);\n\nconst uint32_t g_page_size = getpagesize();\n\ndirect_io_writable_file::direct_io_writable_file(const std::string &file_path)\n    : _file_path(file_path),\n      _fd(-1),\n      _file_size(0),\n      _buffer(nullptr),\n      _buffer_size(FLAGS_direct_io_buffer_pages * g_page_size),\n      _offset(0)\n{\n}\n\ndirect_io_writable_file::~direct_io_writable_file()\n{\n    if (!_buffer || _fd < 0) {\n        return;\n    }\n    // Here is an ensurance, users shuold call finalize manually\n    dassert(_offset == 0, \"finalize() should be called before destructor\");\n\n    free(_buffer);\n    close(_fd);\n}\n\nbool direct_io_writable_file::initialize()\n{\n    if (posix_memalign(&_buffer, g_page_size, _buffer_size) != 0) {\n        derror_f(\"Allocate memaligned buffer failed, errno = {}\", errno);\n        return false;\n    }\n\n    int flag = O_WRONLY | O_TRUNC | O_CREAT | O_DIRECT;\n    _fd = open(_file_path.c_str(), flag, S_IRUSR | S_IWUSR | S_IRGRP);\n    if (_fd < 0) {\n        derror_f(\"Failed to open {} with flag {}, errno = {}\", _file_path, flag, errno);\n        free(_buffer);\n        _buffer = nullptr;\n        return false;\n    }\n    return true;\n}\n\nbool direct_io_writable_file::finalize()\n{\n    dassert(_buffer && _fd >= 0, \"Initialize the instance first\");\n\n    if (_offset > 0) {\n        if (::write(_fd, _buffer, _buffer_size) != _buffer_size) {\n            derror_f(\"Failed to write last chunk, filie_path = {}, errno = {}\", _file_path, errno);\n            return false;\n        }\n        _offset = 0;\n        ftruncate(_fd, _file_size);\n    }\n    return true;\n}\n\nbool direct_io_writable_file::write(const char *s, size_t n)\n{\n    dassert(_buffer && _fd >= 0, \"Initialize the instance first\");\n\n    uint32_t remaining = n;\n    while (remaining > 0) {\n        uint32_t bytes = std::min((_buffer_size - _offset), remaining);\n        memcpy((char *)_buffer + _offset, s, bytes);\n        _offset += bytes;\n        remaining -= bytes;\n        s += bytes;\n        // buffer is full, flush to file\n        if (_offset == _buffer_size) {\n            if (::write(_fd, _buffer, _buffer_size) != _buffer_size) {\n                derror_f(\"Failed to write to direct_io_writable_file, errno = {}\", errno);\n                return false;\n            }\n            // reset offset\n            _offset = 0;\n        }\n    }\n    _file_size += n;\n    return true;\n}\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/directio_writable_file.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass direct_io_writable_file\n{\npublic:\n    explicit direct_io_writable_file(const std::string &file_path);\n    ~direct_io_writable_file();\n\n    bool initialize();\n    bool write(const char *s, size_t n);\n    bool finalize();\n\nprivate:\n    DISALLOW_COPY_AND_ASSIGN(direct_io_writable_file);\n\n    std::string _file_path;\n    int _fd;\n    uint32_t _file_size;\n\n    // page size aligned buffer\n    void *_buffer;\n    // buffer size\n    uint32_t _buffer_size;\n    // buffer offset\n    uint32_t _offset;\n};\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/fds/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.block_service.fds)\n\n#Source files under CURRENT project directory will be automatically included.\n#You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#Search mode for source files under CURRENT project directory ?\n#\"GLOB_RECURSE\" for recursive search\n#\"GLOB\" for non - recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n#Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\ntarget_include_directories(dsn.block_service.fds PRIVATE ${DSN_THIRDPARTY_ROOT}/include/fds)\n"
  },
  {
    "path": "src/block_service/fds/fds_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"fds_service.h\"\n\n#include <galaxy_fds_client.h>\n#include <fds_client_configuration.h>\n#include <galaxy_fds_client_exception.h>\n#include <model/fds_object_metadata.h>\n#include <model/fds_object.h>\n#include <model/fds_object_summary.h>\n#include <model/fds_object_listing.h>\n#include <model/delete_multi_objects_result.h>\n#include <dsn/utility/error_code.h>\n#include <Poco/Net/HTTPResponse.h>\n\n#include <boost/scoped_ptr.hpp>\n#include <boost/algorithm/string/predicate.hpp>\n\n#include <memory>\n#include <fstream>\n#include <string.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/safe_strerror_posix.h>\n#include <dsn/utility/TokenBucket.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nDSN_DEFINE_uint32(\"replication\", fds_write_limit_rate, 100, \"write rate limit of fds(MB/s)\");\nDSN_TAG_VARIABLE(fds_write_limit_rate, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"replication\", fds_write_burst_size, 500, \"write burst size of fds(MB)\");\nDSN_TAG_VARIABLE(fds_write_burst_size, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"replication\", fds_read_limit_rate, 100, \"read rate limit of fds(MB/s)\");\nDSN_TAG_VARIABLE(fds_read_limit_rate, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"replication\", fds_read_batch_size, 100, \"read batch size of fds(MB)\");\nDSN_TAG_VARIABLE(fds_read_batch_size, FT_MUTABLE);\n\nclass utils\n{\npublic:\n    /*\n     * read data from inputstream 'istr' then write to outputstream 'ostr' piece by piece until\n     * reach EOF, the size of each piece is specified by 'piece_size'\n     */\n    static size_t copy_stream(std::istream &istr, std::ostream &ostr, size_t piece_size);\n    /*\n     * normalize a absolute path to a valid fds object path:\n     * 1. the output_path shouldn't start with /\n     * 2. the path shoudn't contain \".\" or \"..\" or \"//\"\n     * 3. if the path is a dir, the path should ends with \"/\"\n     * 4. if the path is root(\"/\"), then return an empty string\n     */\n    static std::string path_to_fds(const std::string &input, bool is_dir);\n    /*\n     * normalize a fds object path to a absolute path:\n     * 1. the output_path starts with /\n     * 2. all the postfix / are removed if the path marks a dir\n     */\n    static std::string path_from_fds(const std::string &input, bool is_dir);\n};\n\n/*static*/\nsize_t utils::copy_stream(std::istream &is, std::ostream &os, size_t piece_size)\n{\n    std::unique_ptr<char[]> buffer(new char[piece_size]);\n    size_t length = 0;\n    is.read(buffer.get(), piece_size);\n    size_t got_length = is.gcount();\n    while (got_length > 0) {\n        length += got_length;\n        os.write(buffer.get(), got_length);\n        if (is && os) {\n            is.read(buffer.get(), piece_size);\n            got_length = is.gcount();\n        } else\n            got_length = 0;\n    }\n    return length;\n}\n\n/*static*/\nstd::string utils::path_to_fds(const std::string &input, bool is_dir)\n{\n    // TODO: handle the \".\" and \"..\" and \"//\"\n    if (input.size() < 1 || input == \"/\")\n        return std::string();\n    std::string result;\n    if (input.front() == '/')\n        result = input.substr(1);\n    else\n        result = input;\n\n    if (is_dir)\n        result.push_back('/');\n    return result;\n}\n\n/*static*/\nstd::string utils::path_from_fds(const std::string &input, bool /*is_dir*/)\n{\n    std::string result = input;\n    if (!input.empty() && input.back() == '/')\n        result.pop_back();\n    return result;\n}\n\nDEFINE_TASK_CODE(LPC_FDS_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_BLOCK_SERVICE)\n\nconst std::string fds_service::FILE_LENGTH_CUSTOM_KEY = \"x-xiaomi-meta-content-length\";\nconst std::string fds_service::FILE_MD5_KEY = \"content-md5\";\n\nfds_service::fds_service()\n{\n    _write_token_bucket.reset(new folly::DynamicTokenBucket());\n    _read_token_bucket.reset(new folly::DynamicTokenBucket());\n}\n\nfds_service::~fds_service() {}\n\n/**\n * @brief fds_service::initialize\n * @param args: {httpServer, accessKey, secretKey, bucket}\n * @return\n */\nerror_code fds_service::initialize(const std::vector<std::string> &args)\n{\n    galaxy::fds::FDSClientConfiguration config;\n    config.enableHttps(true);\n    config.setEndpoint(args[0]);\n    const std::string &access_key = args[1];\n    const std::string &secret_key = args[2];\n\n    _client.reset(new galaxy::fds::GalaxyFDSClient(access_key, secret_key, config));\n    _bucket_name = args[3];\n    return dsn::ERR_OK;\n}\n\n#define FDS_EXCEPTION_HANDLE(ERR_REFERENCE, OPERATION, INPUT_PARAMETER)                            \\\n    catch (const Poco::TimeoutException &ex)                                                       \\\n    {                                                                                              \\\n        derror(\"fds %s timeout: parameter(%s), code(%d), msg(%s)\",                                 \\\n               OPERATION,                                                                          \\\n               INPUT_PARAMETER,                                                                    \\\n               ex.code(),                                                                          \\\n               ex.message().c_str());                                                              \\\n        ERR_REFERENCE = ERR_TIMEOUT;                                                               \\\n    }                                                                                              \\\n    catch (const Poco::Exception &ex)                                                              \\\n    {                                                                                              \\\n        derror(\"fds %s get poco exception: parameter(%s), code(%d), msg(%s), what(%s)\",            \\\n               OPERATION,                                                                          \\\n               INPUT_PARAMETER,                                                                    \\\n               ex.code(),                                                                          \\\n               ex.message().c_str(),                                                               \\\n               ex.what());                                                                         \\\n        ERR_REFERENCE = ERR_FS_INTERNAL;                                                           \\\n    }                                                                                              \\\n    catch (...)                                                                                    \\\n    {                                                                                              \\\n        derror(\"fds %s get unknown exception: parameter(%s)\", OPERATION, INPUT_PARAMETER);         \\\n        ERR_REFERENCE = ERR_FS_INTERNAL;                                                           \\\n    }\n\ndsn::task_ptr fds_service::list_dir(const ls_request &req,\n                                    dsn::task_code code,\n                                    const ls_callback &callback,\n                                    dsn::task_tracker *tracker = nullptr)\n{\n    ls_future_ptr t(new ls_future(code, callback, 0));\n    t->set_tracker(tracker);\n\n    auto list_dir_in_background = [this, req, t]() {\n        ls_response resp;\n        std::string fds_path = utils::path_to_fds(req.dir_name, true);\n        try {\n            std::shared_ptr<galaxy::fds::FDSObjectListing> result =\n                _client->listObjects(_bucket_name, fds_path);\n\n            while (true) {\n                const std::vector<galaxy::fds::FDSObjectSummary> &objs = result->objectSummaries();\n                const std::vector<std::string> &common_prefix = result->commonPrefixes();\n                resp.err = dsn::ERR_OK;\n\n                // fds listing's objects are with full-path, we must extract the postfix to emulate\n                // the filesystem structure\n                for (const galaxy::fds::FDSObjectSummary &obj : objs) {\n                    dassert(fds_path.empty() || boost::starts_with(obj.objectName(), fds_path),\n                            \"invalid path(%s) in parent(%s)\",\n                            obj.objectName().c_str(),\n                            fds_path.c_str());\n                    resp.entries->push_back(\n                        {utils::path_from_fds(obj.objectName().substr(fds_path.size()), false),\n                         false});\n                }\n                for (const std::string &s : common_prefix) {\n                    dassert(fds_path.empty() || boost::starts_with(s, fds_path),\n                            \"invalid path(%s) in parent(%s)\",\n                            s.c_str(),\n                            fds_path.c_str());\n                    resp.entries->push_back(\n                        {utils::path_from_fds(s.substr(fds_path.size()), true), true});\n                }\n\n                // list result may be paged\n                if (result->truncated()) {\n                    auto res_temp = _client->listNextBatchOfObjects(*result);\n                    result.swap(res_temp);\n                } else {\n                    break;\n                }\n            }\n        } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n            derror(\"fds listObjects failed: parameter(%s), code(%d), msg(%s)\",\n                   req.dir_name.c_str(),\n                   ex.code(),\n                   ex.what());\n            resp.err = ERR_FS_INTERNAL;\n        }\n        FDS_EXCEPTION_HANDLE(resp.err, \"listObject\", req.dir_name.c_str())\n\n        if (resp.err == dsn::ERR_OK && resp.entries->empty()) {\n            try {\n                if (_client->doesObjectExist(_bucket_name,\n                                             utils::path_to_fds(req.dir_name, false))) {\n                    derror(\"fds list_dir failed: path not dir, parameter(%s)\",\n                           req.dir_name.c_str());\n                    resp.err = ERR_INVALID_PARAMETERS;\n                } else {\n                    derror(\"fds list_dir failed: path not found, parameter(%s)\",\n                           req.dir_name.c_str());\n                    resp.err = ERR_OBJECT_NOT_FOUND;\n                }\n            } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n                derror(\"fds doesObjectExist failed: parameter(%s), code(%d), msg(%s)\",\n                       req.dir_name.c_str(),\n                       ex.code(),\n                       ex.what());\n                resp.err = ERR_FS_INTERNAL;\n            }\n            FDS_EXCEPTION_HANDLE(resp.err, \"doesObjectExist\", req.dir_name.c_str())\n        }\n\n        t->enqueue_with(resp);\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, list_dir_in_background);\n    return t;\n}\n\ndsn::task_ptr fds_service::create_file(const create_file_request &req,\n                                       dsn::task_code code,\n                                       const create_file_callback &cb,\n                                       dsn::task_tracker *tracker = nullptr)\n{\n    create_file_future_ptr t(new create_file_future(code, cb, 0));\n    t->set_tracker(tracker);\n    if (req.ignore_metadata) {\n        create_file_response resp;\n        resp.err = dsn::ERR_OK;\n        resp.file_handle =\n            new fds_file_object(this, req.file_name, utils::path_to_fds(req.file_name, false));\n        t->enqueue_with(resp);\n        return t;\n    }\n\n    auto create_file_in_background = [this, req, t]() {\n        create_file_response resp;\n        resp.err = ERR_IO_PENDING;\n        std::string fds_path = utils::path_to_fds(req.file_name, false);\n\n        dsn::ref_ptr<fds_file_object> f = new fds_file_object(this, req.file_name, fds_path);\n        resp.err = f->get_file_meta();\n        if (resp.err == ERR_OK || resp.err == ERR_OBJECT_NOT_FOUND) {\n            resp.err = ERR_OK;\n            resp.file_handle = f;\n        }\n\n        t->enqueue_with(resp);\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, create_file_in_background);\n    return t;\n}\n\ndsn::task_ptr fds_service::remove_path(const remove_path_request &req,\n                                       dsn::task_code code,\n                                       const remove_path_callback &cb,\n                                       dsn::task_tracker *tracker)\n{\n    remove_path_future_ptr callback(new remove_path_future(code, cb, 0));\n    callback->set_tracker(tracker);\n    auto remove_path_background = [this, req, callback]() {\n        remove_path_response resp;\n        resp.err = ERR_OK;\n        std::string fds_path = utils::path_to_fds(req.path, true);\n        bool should_remove_path = false;\n\n        try {\n            std::shared_ptr<galaxy::fds::FDSObjectListing> result =\n                _client->listObjects(_bucket_name, fds_path);\n            while (result->objectSummaries().size() <= 0 && result->commonPrefixes().size() <= 0 &&\n                   result->truncated()) {\n                result = _client->listNextBatchOfObjects(*result);\n            }\n            const std::vector<galaxy::fds::FDSObjectSummary> &objs = result->objectSummaries();\n            const std::vector<std::string> &common_prefix = result->commonPrefixes();\n\n            if (!objs.empty() || !common_prefix.empty()) {\n                // path is non-empty directory\n                if (req.recursive) {\n                    should_remove_path = true;\n                } else {\n                    derror(\"fds remove_path failed: dir not empty, parameter(%s)\",\n                           req.path.c_str());\n                    resp.err = ERR_DIR_NOT_EMPTY;\n                }\n            } else {\n                if (_client->doesObjectExist(_bucket_name, utils::path_to_fds(req.path, false))) {\n                    should_remove_path = true;\n                } else {\n                    derror(\"fds remove_path failed: path not found, parameter(%s)\",\n                           req.path.c_str());\n                    resp.err = ERR_OBJECT_NOT_FOUND;\n                }\n            }\n        } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n            derror(\"fds remove_path failed: parameter(%s), code(%d), msg(%s)\",\n                   req.path.c_str(),\n                   ex.code(),\n                   ex.what());\n            resp.err = ERR_FS_INTERNAL;\n        }\n        FDS_EXCEPTION_HANDLE(resp.err, \"remove_path\", req.path.c_str());\n\n        if (resp.err == ERR_OK && should_remove_path) {\n            fds_path = utils::path_to_fds(req.path, false);\n            try {\n                auto deleting = _client->deleteObjects(_bucket_name, fds_path, false);\n                if (deleting->countFailedObjects() <= 0) {\n                    resp.err = ERR_OK;\n                } else {\n                    derror(\"fds remove_path failed: countFailedObjects = %d, parameter(%s)\",\n                           deleting->countFailedObjects(),\n                           req.path.c_str());\n                    resp.err = ERR_FS_INTERNAL;\n                }\n            } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n                derror(\"fds remove_path failed: parameter(%s), code(%d), msg(%s)\",\n                       req.path.c_str(),\n                       ex.code(),\n                       ex.what());\n                resp.err = ERR_FS_INTERNAL;\n            }\n            FDS_EXCEPTION_HANDLE(resp.err, \"remove_path\", req.path.c_str());\n        }\n\n        callback->enqueue_with(resp);\n        return;\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, remove_path_background);\n    return callback;\n}\n\nfds_file_object::fds_file_object(fds_service *s,\n                                 const std::string &name,\n                                 const std::string &fds_path)\n    : block_file(name),\n      _service(s),\n      _fds_path(fds_path),\n      _md5sum(\"\"),\n      _size(0),\n      _has_meta_synced(false)\n{\n}\n\nfds_file_object::~fds_file_object() {}\n\nerror_code fds_file_object::get_file_meta()\n{\n    error_code err = ERR_OK;\n    galaxy::fds::GalaxyFDSClient *c = _service->get_client();\n    try {\n        auto meta = c->getObjectMetadata(_service->get_bucket_name(), _fds_path)->metadata();\n\n        // get file length\n        auto iter = meta.find(fds_service::FILE_LENGTH_CUSTOM_KEY);\n        dassert_f(iter != meta.end(),\n                  \"can't find {} in object({})'s metadata\",\n                  fds_service::FILE_LENGTH_CUSTOM_KEY.c_str(),\n                  _fds_path.c_str());\n        bool valid = dsn::buf2uint64(iter->second, _size);\n        dassert_f(valid, \"error to get file size\");\n\n        // get md5 key\n        iter = meta.find(fds_service::FILE_MD5_KEY);\n        dassert_f(iter != meta.end(),\n                  \"can't find {} in object({})'s metadata\",\n                  fds_service::FILE_MD5_KEY.c_str(),\n                  _fds_path.c_str());\n        _md5sum = iter->second;\n\n        _has_meta_synced = true;\n    } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n        if (ex.code() == Poco::Net::HTTPResponse::HTTP_NOT_FOUND) {\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            derror_f(\"fds getObjectMetadata failed: parameter({}), code({}), msg({})\",\n                     _name.c_str(),\n                     ex.code(),\n                     ex.what());\n            err = ERR_FS_INTERNAL;\n        }\n    }\n    FDS_EXCEPTION_HANDLE(err, \"getObjectMetadata\", _fds_path.c_str());\n    return err;\n}\n\nerror_code fds_file_object::get_content_in_batches(uint64_t start,\n                                                   int64_t length,\n                                                   /*out*/ std::ostream &os,\n                                                   /*out*/ uint64_t &transfered_bytes)\n{\n    error_code err = ERR_OK;\n    transfered_bytes = 0;\n\n    // get file meta if it is not synced\n    if (!_has_meta_synced) {\n        err = get_file_meta();\n        if (ERR_OK != err) {\n            return err;\n        }\n    }\n\n    // if length = -1, it means we should transfer the whole file\n    uint64_t to_transfer_bytes = (length == -1 ? _size : length);\n\n    uint64_t pos = start;\n    uint64_t once_transfered_bytes = 0;\n    while (pos < start + to_transfer_bytes) {\n        const uint64_t BATCH_SIZE = FLAGS_fds_read_batch_size << 20;\n        uint64_t batch_size = std::min(BATCH_SIZE, start + to_transfer_bytes - pos);\n\n        // burst size should not be less than consume size\n        const uint64_t rate = FLAGS_fds_read_limit_rate << 20;\n        _service->_read_token_bucket->consumeWithBorrowAndWait(\n            batch_size, rate, std::max(2 * rate, batch_size));\n\n        err = get_content(pos, batch_size, os, once_transfered_bytes);\n        transfered_bytes += once_transfered_bytes;\n        if (err != ERR_OK || once_transfered_bytes < batch_size) {\n            return err;\n        }\n        pos += batch_size;\n    }\n\n    return ERR_OK;\n}\n\nerror_code fds_file_object::get_content(uint64_t pos,\n                                        uint64_t length,\n                                        /*out*/ std::ostream &os,\n                                        /*out*/ uint64_t &transfered_bytes)\n{\n    error_code err = ERR_OK;\n    transfered_bytes = 0;\n    while (true) {\n        // if we have download enough or we have reach the end\n        if (transfered_bytes >= length || transfered_bytes + pos >= _size) {\n            return ERR_OK;\n        }\n\n        try {\n            galaxy::fds::GalaxyFDSClient *c = _service->get_client();\n            std::shared_ptr<galaxy::fds::FDSObject> obj;\n            obj = c->getObject(_service->get_bucket_name(),\n                               _fds_path,\n                               pos + transfered_bytes,\n                               length - transfered_bytes);\n            dinfo(\"get object from fds succeed, remote_file(%s)\", _fds_path.c_str());\n            std::istream &is = obj->objectContent();\n            transfered_bytes += utils::copy_stream(is, os, PIECE_SIZE);\n            err = ERR_OK;\n        } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n            derror(\"fds getObject error: remote_file(%s), code(%d), msg(%s)\",\n                   file_name().c_str(),\n                   ex.code(),\n                   ex.what());\n            if (ex.code() == Poco::Net::HTTPResponse::HTTP_NOT_FOUND) {\n                _has_meta_synced = true;\n                _md5sum = \"\";\n                _size = 0;\n                err = ERR_OBJECT_NOT_FOUND;\n            } else {\n                err = ERR_FS_INTERNAL;\n            }\n        }\n        FDS_EXCEPTION_HANDLE(err, \"getObject\", file_name().c_str())\n\n        if (err != ERR_OK) {\n            return err;\n        }\n    }\n}\n\nerror_code fds_file_object::put_content(/*in-out*/ std::istream &is,\n                                        int64_t to_transfer_bytes,\n                                        uint64_t &transfered_bytes)\n{\n    error_code err = ERR_OK;\n    transfered_bytes = 0;\n    galaxy::fds::GalaxyFDSClient *c = _service->get_client();\n\n    // get tokens from token bucket\n    if (!_service->_write_token_bucket->consumeWithBorrowAndWait(to_transfer_bytes,\n                                                                 FLAGS_fds_write_limit_rate << 20,\n                                                                 FLAGS_fds_write_burst_size\n                                                                     << 20)) {\n        ddebug_f(\"the transfer count({}B) is greater than burst size({}MB), so it is rejected by \"\n                 \"token bucket\",\n                 to_transfer_bytes,\n                 FLAGS_fds_write_burst_size);\n        return ERR_BUSY;\n    }\n\n    try {\n        c->putObject(_service->get_bucket_name(), _fds_path, is, galaxy::fds::FDSObjectMetadata());\n    } catch (const galaxy::fds::GalaxyFDSClientException &ex) {\n        derror(\"fds putObject error: remote_file(%s), code(%d), msg(%s)\",\n               file_name().c_str(),\n               ex.code(),\n               ex.what());\n        err = ERR_FS_INTERNAL;\n    }\n    FDS_EXCEPTION_HANDLE(err, \"putObject\", file_name().c_str())\n\n    if (err != ERR_OK) {\n        return err;\n    }\n\n    ddebug(\"start to synchronize meta data after successfully wrote data to fds\");\n    err = get_file_meta();\n    if (err == ERR_OK) {\n        transfered_bytes = _size;\n    }\n    return err;\n}\n\ndsn::task_ptr fds_file_object::write(const write_request &req,\n                                     dsn::task_code code,\n                                     const write_callback &cb,\n                                     dsn::task_tracker *tracker = nullptr)\n{\n    write_future_ptr t(new write_future(code, cb, 0));\n    t->set_tracker(tracker);\n\n    add_ref();\n    auto write_in_background = [this, req, t]() {\n        write_response resp;\n        std::istringstream is;\n        is.str(std::string(req.buffer.data(), req.buffer.length()));\n        resp.err = put_content(is, req.buffer.length(), resp.written_size);\n\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, write_in_background);\n    return t;\n}\n\n// TODO: handle the localfile path\ndsn::task_ptr fds_file_object::upload(const upload_request &req,\n                                      dsn::task_code code,\n                                      const upload_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr)\n{\n    upload_future_ptr t(new upload_future(code, cb, 0));\n    t->set_tracker(tracker);\n\n    add_ref();\n    auto upload_background = [this, req, t]() {\n        const std::string &local_file = req.input_local_name;\n        // get file size\n        int64_t file_sz = 0;\n        dsn::utils::filesystem::file_size(local_file, file_sz);\n\n        upload_response resp;\n        // TODO: we can cache the whole file in buffer, then upload the buffer rather than the\n        // ifstream, because if ifstream read file beyond 60s, fds-server will reset the session,\n        // then upload will fail with error broken-pipe\n        std::ifstream is(local_file, std::ios::binary | std::ios::in);\n\n        if (!is.is_open()) {\n            derror_f(\"fds upload failed: open local file({}) failed when upload to({}), error({})\",\n                     local_file,\n                     file_name(),\n                     ::dsn::utils::safe_strerror(errno));\n            resp.err = dsn::ERR_FILE_OPERATION_FAILED;\n        } else {\n            resp.err = put_content(is, file_sz, resp.uploaded_size);\n            is.close();\n        }\n\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, upload_background);\n    return t;\n}\n\ndsn::task_ptr fds_file_object::read(const read_request &req,\n                                    dsn::task_code code,\n                                    const read_callback &cb,\n                                    dsn::task_tracker *tracker = nullptr)\n{\n    read_future_ptr t(new read_future(code, cb, 0));\n    t->set_tracker(tracker);\n\n    add_ref();\n    auto read_in_background = [this, req, t]() {\n        read_response resp;\n        std::ostringstream os;\n        uint64_t transferd_size;\n        resp.err = get_content_in_batches(req.remote_pos, req.remote_length, os, transferd_size);\n        if (os.tellp() > 0) {\n            std::string *output = new std::string();\n            *output = os.str();\n            std::shared_ptr<char> ptr((char *)output->c_str(), [output](char *) { delete output; });\n            resp.buffer.assign(std::move(ptr), 0, output->length());\n        }\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, read_in_background);\n    return t;\n}\n\n// TODO: handle the localfile path\ndsn::task_ptr fds_file_object::download(const download_request &req,\n                                        dsn::task_code code,\n                                        const download_callback &cb,\n                                        dsn::task_tracker *tracker = nullptr)\n{\n    download_future_ptr t(new download_future(code, cb, 0));\n    t->set_tracker(tracker);\n    download_response resp;\n\n    std::shared_ptr<std::ofstream> handle(new std::ofstream(\n        req.output_local_name, std::ios::binary | std::ios::out | std::ios::trunc));\n    if (!handle->is_open()) {\n        derror_f(\"fds download failed: fail to open localfile({}) when download({}), error({})\",\n                 req.output_local_name,\n                 _fds_path,\n                 ::dsn::utils::safe_strerror(errno));\n        resp.err = ERR_FILE_OPERATION_FAILED;\n        resp.downloaded_size = 0;\n        t->enqueue_with(resp);\n        return t;\n    }\n\n    add_ref();\n    auto download_background = [this, req, handle, t]() {\n        download_response resp;\n        uint64_t transfered_size;\n        resp.err =\n            get_content_in_batches(req.remote_pos, req.remote_length, *handle, transfered_size);\n        resp.downloaded_size = 0;\n        if (resp.err == ERR_OK && handle->tellp() != -1) {\n            resp.downloaded_size = handle->tellp();\n        }\n        handle->close();\n        if (resp.err != ERR_OK && dsn::utils::filesystem::file_exists(req.output_local_name)) {\n            derror_f(\"fail to download file {} from fds, remove localfile {}\",\n                     _fds_path,\n                     req.output_local_name);\n            dsn::utils::filesystem::remove_path(req.output_local_name);\n        } else if ((resp.err = dsn::utils::filesystem::md5sum(req.output_local_name,\n                                                              resp.file_md5)) != ERR_OK) {\n            derror_f(\"download failed when calculate the md5sum of local file {}\",\n                     req.output_local_name);\n        }\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_FDS_CALL, nullptr, download_background);\n    return t;\n}\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/fds/fds_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#ifndef FDS_SERVICE_H\n#define FDS_SERVICE_H\n\n#include <dsn/dist/block_service.h>\n\nnamespace folly {\ntemplate <typename Clock>\nclass BasicDynamicTokenBucket;\n\nusing DynamicTokenBucket = BasicDynamicTokenBucket<std::chrono::steady_clock>;\n} // namespace folly\n\nnamespace galaxy {\nnamespace fds {\nclass GalaxyFDSClient;\n}\n} // namespace galaxy\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass fds_service : public block_filesystem\n{\npublic:\n    static const std::string FILE_MD5_KEY;\n    static const std::string FILE_LENGTH_KEY;\n    static const std::string FILE_LENGTH_CUSTOM_KEY;\n\npublic:\n    fds_service();\n    galaxy::fds::GalaxyFDSClient *get_client() { return _client.get(); }\n    const std::string &get_bucket_name() { return _bucket_name; }\n\n    virtual ~fds_service() override;\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual dsn::task_ptr list_dir(const ls_request &req,\n                                   dsn::task_code code,\n                                   const ls_callback &callback,\n                                   dsn::task_tracker *tracker) override;\n\n    virtual dsn::task_ptr create_file(const create_file_request &req,\n                                      dsn::task_code code,\n                                      const create_file_callback &cb,\n                                      dsn::task_tracker *tracker) override;\n\n    //\n    // Attention：\n    //   -- remove the path directly on fds, will not enter trash\n    //   -- when req.path is a directory, this operation may consume much time if there are many\n    //      files under this directory\n    //\n    virtual dsn::task_ptr remove_path(const remove_path_request &req,\n                                      dsn::task_code code,\n                                      const remove_path_callback &cb,\n                                      dsn::task_tracker *tracker) override;\n\nprivate:\n    std::shared_ptr<galaxy::fds::GalaxyFDSClient> _client;\n    std::string _bucket_name;\n    std::unique_ptr<folly::DynamicTokenBucket> _write_token_bucket;\n    std::unique_ptr<folly::DynamicTokenBucket> _read_token_bucket;\n\n    friend class fds_file_object;\n};\n\nclass fds_file_object : public block_file\n{\npublic:\n    fds_file_object(fds_service *s, const std::string &name, const std::string &fds_path);\n\n    virtual ~fds_file_object();\n    virtual uint64_t get_size() override { return _size; }\n    virtual const std::string &get_md5sum() override { return _md5sum; }\n\n    virtual dsn::task_ptr write(const write_request &req,\n                                dsn::task_code code,\n                                const write_callback &cb,\n                                dsn::task_tracker *tracker) override;\n\n    virtual dsn::task_ptr read(const read_request &req,\n                               dsn::task_code code,\n                               const read_callback &cb,\n                               dsn::task_tracker *tracker) override;\n\n    virtual dsn::task_ptr upload(const upload_request &req,\n                                 dsn::task_code code,\n                                 const upload_callback &cb,\n                                 dsn::task_tracker *tracker) override;\n\n    virtual dsn::task_ptr download(const download_request &req,\n                                   dsn::task_code code,\n                                   const download_callback &cb,\n                                   dsn::task_tracker *tracker) override;\n\n    // Possible errors:\n    // - ERR_FS_INTERNAL\n    // - ERR_OBJECT_NOT_FOUND\n    // - ERR_TIMEOUT\n    error_code get_file_meta();\n\nprivate:\n    error_code get_content_in_batches(uint64_t start,\n                                      int64_t length,\n                                      /*out*/ std::ostream &os,\n                                      /*out*/ uint64_t &transfered_bytes);\n    error_code get_content(uint64_t pos,\n                           uint64_t length,\n                           /*out*/ std::ostream &os,\n                           /*out*/ uint64_t &transfered_bytes);\n    error_code put_content(/*in-out*/ std::istream &is,\n                           /*int*/ int64_t to_transfer_bytes,\n                           /*out*/ uint64_t &transfered_bytes);\n\n    fds_service *_service;\n    std::string _fds_path;\n    std::string _md5sum;\n    uint64_t _size;\n    bool _has_meta_synced;\n\n    static const size_t PIECE_SIZE = 16384; // 16k\n};\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n#endif // FDS_SERVICE_H\n"
  },
  {
    "path": "src/block_service/hdfs/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.block_service.hdfs)\n\nset(DIRECTIO_SRC\n        ../directio_writable_file.cpp\n        )\n\n#Source files under CURRENT project directory will be automatically included.\n#You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"${DIRECTIO_SRC}\")\n\n#Search mode for source files under CURRENT project directory ?\n#\"GLOB_RECURSE\" for recursive search\n#\"GLOB\" for non - recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS hdfs)\n\n#Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\ntarget_link_libraries(${MY_PROJ_NAME} PUBLIC hdfs ${JAVA_JVM_LIBRARY})\n"
  },
  {
    "path": "src/block_service/hdfs/hdfs_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <algorithm>\n#include <fstream>\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/safe_strerror_posix.h>\n#include <dsn/utility/TokenBucket.h>\n#include <dsn/utility/utils.h>\n\n#include \"hdfs_service.h\"\n#include \"block_service/directio_writable_file.h\"\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nDEFINE_TASK_CODE(LPC_HDFS_SERVICE_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_BLOCK_SERVICE)\n\nDSN_DEFINE_uint64(\"replication\",\n                  hdfs_read_batch_size_bytes,\n                  64 << 20,\n                  \"hdfs read batch size, the default value is 64MB\");\nDSN_TAG_VARIABLE(hdfs_read_batch_size_bytes, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"replication\", hdfs_read_limit_rate_mb_per_sec, 200, \"hdfs read limit(MB/s)\");\nDSN_TAG_VARIABLE(hdfs_read_limit_rate_mb_per_sec, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"replication\", hdfs_write_limit_rate_mb_per_sec, 200, \"hdfs write limit(MB/s)\");\nDSN_TAG_VARIABLE(hdfs_write_limit_rate_mb_per_sec, FT_MUTABLE);\n\nDSN_DEFINE_uint64(\"replication\",\n                  hdfs_write_batch_size_bytes,\n                  64 << 20,\n                  \"hdfs write batch size, the default value is 64MB\");\nDSN_TAG_VARIABLE(hdfs_write_batch_size_bytes, FT_MUTABLE);\n\nDSN_DECLARE_bool(enable_direct_io);\n\nhdfs_service::hdfs_service()\n{\n    _read_token_bucket.reset(new folly::DynamicTokenBucket());\n    _write_token_bucket.reset(new folly::DynamicTokenBucket());\n}\n\nhdfs_service::~hdfs_service()\n{\n    // We should not call hdfsDisconnect() here if jvm has exited.\n    // And there is no simple, safe way to call hdfsDisconnect()\n    // when process terminates (the proper solution is likely to create a\n    // signal handler to detect when the process is killed, but we would still\n    // leak when pegasus crashes).\n    //\n    // close();\n}\n\nerror_code hdfs_service::initialize(const std::vector<std::string> &args)\n{\n    if (args.size() < 1) {\n        return ERR_INVALID_PARAMETERS;\n    }\n    // Name_node and root_path should be set in args of block_service configuration.\n    // If no path was configured, just use \"/\" as default root path.\n    _hdfs_name_node = args[0];\n    _hdfs_path = args.size() >= 2 ? args[1] : \"/\";\n    ddebug_f(\"hdfs backup root path is initialized to {}.\", _hdfs_path);\n\n    return create_fs();\n}\n\nerror_code hdfs_service::create_fs()\n{\n    hdfsBuilder *builder = hdfsNewBuilder();\n    if (!builder) {\n        derror_f(\"Fail to create an HDFS builder, error: {}.\", utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n    hdfsBuilderSetNameNode(builder, _hdfs_name_node.c_str());\n    _fs = hdfsBuilderConnect(builder);\n    if (!_fs) {\n        derror_f(\"Fail to connect hdfs name node {}, error: {}.\",\n                 _hdfs_name_node,\n                 utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n    ddebug_f(\"Succeed to connect hdfs name node {}.\", _hdfs_name_node);\n    return ERR_OK;\n}\n\nvoid hdfs_service::close()\n{\n    // This method should be carefully called.\n    // Calls to hdfsDisconnect() by individual threads would terminate\n    // all other connections handed out via hdfsConnect() to the same URI.\n    ddebug(\"Try to disconnect hdfs.\");\n    int result = hdfsDisconnect(_fs);\n    if (result == -1) {\n        derror_f(\"Fail to disconnect from the hdfs file system, error: {}.\",\n                 utils::safe_strerror(errno));\n    }\n    // Even if there is an error, the resources associated with the hdfsFS will be freed.\n    _fs = nullptr;\n}\n\nstd::string hdfs_service::get_hdfs_entry_name(const std::string &hdfs_path)\n{\n    // get exact file name from an hdfs path.\n    int pos = hdfs_path.find_last_of(\"/\");\n    return hdfs_path.substr(pos + 1);\n}\n\ndsn::task_ptr hdfs_service::list_dir(const ls_request &req,\n                                     dsn::task_code code,\n                                     const ls_callback &cb,\n                                     dsn::task_tracker *tracker = nullptr)\n{\n    ls_future_ptr tsk(new ls_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    auto list_dir_background = [this, req, tsk]() {\n        std::string path = dsn::utils::filesystem::path_combine(_hdfs_path, req.dir_name);\n        ls_response resp;\n\n        if (hdfsExists(_fs, path.c_str()) == -1) {\n            derror_f(\"HDFS list directory failed: path {} not found.\", path);\n            resp.err = ERR_OBJECT_NOT_FOUND;\n            tsk->enqueue_with(resp);\n            return;\n        }\n\n        hdfsFileInfo *dir_info = hdfsGetPathInfo(_fs, path.c_str());\n        if (dir_info == nullptr) {\n            derror_f(\"HDFS get path {} failed.\", path);\n            resp.err = ERR_FS_INTERNAL;\n            tsk->enqueue_with(resp);\n            return;\n        }\n\n        if (dir_info->mKind == kObjectKindFile) {\n            derror_f(\"HDFS list directory failed, {} is not a directory\", path);\n            resp.err = ERR_INVALID_PARAMETERS;\n        } else {\n            int entries = 0;\n            hdfsFileInfo *info = hdfsListDirectory(_fs, path.c_str(), &entries);\n            if (info == nullptr) {\n                derror_f(\"HDFS list directory {} failed.\", path);\n                resp.err = ERR_FS_INTERNAL;\n            } else {\n                for (int i = 0; i < entries; i++) {\n                    ls_entry tentry;\n                    tentry.entry_name = get_hdfs_entry_name(std::string(info[i].mName));\n                    tentry.is_directory = (info[i].mKind == kObjectKindDirectory);\n                    resp.entries->emplace_back(tentry);\n                }\n                hdfsFreeFileInfo(info, entries);\n                resp.err = ERR_OK;\n            }\n        }\n        hdfsFreeFileInfo(dir_info, 1);\n        tsk->enqueue_with(resp);\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, list_dir_background);\n    return tsk;\n}\n\ndsn::task_ptr hdfs_service::create_file(const create_file_request &req,\n                                        dsn::task_code code,\n                                        const create_file_callback &cb,\n                                        dsn::task_tracker *tracker = nullptr)\n{\n    create_file_future_ptr tsk(new create_file_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n    std::string hdfs_file = dsn::utils::filesystem::path_combine(_hdfs_path, req.file_name);\n\n    if (req.ignore_metadata) {\n        create_file_response resp;\n        resp.err = ERR_OK;\n        resp.file_handle = new hdfs_file_object(this, hdfs_file);\n        tsk->enqueue_with(resp);\n        return tsk;\n    }\n\n    auto create_file_in_background = [this, req, hdfs_file, tsk]() {\n        create_file_response resp;\n        dsn::ref_ptr<hdfs_file_object> f = new hdfs_file_object(this, hdfs_file);\n        resp.err = f->get_file_meta();\n        if (resp.err == ERR_OK || resp.err == ERR_OBJECT_NOT_FOUND) {\n            // Just to create a hdfs_file_object locally. The file may not appear on HDFS\n            // immediately after this call.\n            resp.err = ERR_OK;\n            resp.file_handle = f;\n            ddebug_f(\"create remote file {} succeed\", hdfs_file);\n        }\n        tsk->enqueue_with(resp);\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, create_file_in_background);\n    return tsk;\n}\n\ndsn::task_ptr hdfs_service::remove_path(const remove_path_request &req,\n                                        dsn::task_code code,\n                                        const remove_path_callback &cb,\n                                        dsn::task_tracker *tracker)\n{\n    remove_path_future_ptr tsk(new remove_path_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    auto remove_path_background = [this, req, tsk]() {\n        std::string path = dsn::utils::filesystem::path_combine(_hdfs_path, req.path);\n        remove_path_response resp;\n\n        // Check if path exists.\n        if (hdfsExists(_fs, path.c_str()) == -1) {\n            derror_f(\"HDFS remove_path failed: path {} not found.\", path);\n            resp.err = ERR_OBJECT_NOT_FOUND;\n            tsk->enqueue_with(resp);\n            return;\n        }\n\n        int entries = 0;\n        hdfsFileInfo *info = hdfsListDirectory(_fs, path.c_str(), &entries);\n        hdfsFreeFileInfo(info, entries);\n        if (entries > 0 && !req.recursive) {\n            derror_f(\"HDFS remove_path failed: directory {} is not empty.\", path);\n            resp.err = ERR_DIR_NOT_EMPTY;\n            tsk->enqueue_with(resp);\n            return;\n        }\n\n        // Remove directory now.\n        if (hdfsDelete(_fs, path.c_str(), req.recursive) == -1) {\n            derror_f(\"HDFS remove_path {} failed.\", path);\n            resp.err = ERR_FS_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n        }\n        tsk->enqueue_with(resp);\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, remove_path_background);\n    return tsk;\n}\n\nhdfs_file_object::hdfs_file_object(hdfs_service *s, const std::string &name)\n    : block_file(name), _service(s), _md5sum(\"\"), _size(0), _has_meta_synced(false)\n{\n}\n\nerror_code hdfs_file_object::get_file_meta()\n{\n    if (hdfsExists(_service->get_fs(), file_name().c_str()) == -1) {\n        dwarn_f(\"HDFS file {} does not exist.\", file_name());\n        return ERR_OBJECT_NOT_FOUND;\n    }\n    hdfsFileInfo *info = hdfsGetPathInfo(_service->get_fs(), file_name().c_str());\n    if (info == nullptr) {\n        derror_f(\"HDFS get file info failed, file: {}.\", file_name());\n        return ERR_FS_INTERNAL;\n    }\n    _size = info->mSize;\n    _has_meta_synced = true;\n    hdfsFreeFileInfo(info, 1);\n    return ERR_OK;\n}\n\nhdfs_file_object::~hdfs_file_object() {}\n\nerror_code hdfs_file_object::write_data_in_batches(const char *data,\n                                                   const uint64_t data_size,\n                                                   uint64_t &written_size)\n{\n    written_size = 0;\n    hdfsFile write_file =\n        hdfsOpenFile(_service->get_fs(), file_name().c_str(), O_WRONLY | O_CREAT, 0, 0, 0);\n    if (!write_file) {\n        derror_f(\"Failed to open hdfs file {} for writting, error: {}.\",\n                 file_name(),\n                 utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n    uint64_t cur_pos = 0;\n    uint64_t write_len = 0;\n    while (cur_pos < data_size) {\n        write_len = std::min(data_size - cur_pos, FLAGS_hdfs_write_batch_size_bytes);\n        const uint64_t rate = FLAGS_hdfs_write_limit_rate_mb_per_sec << 20;\n        const uint64_t burst_size = std::max(2 * rate, write_len);\n        _service->_write_token_bucket->consumeWithBorrowAndWait(write_len, rate, burst_size);\n\n        tSize num_written_bytes = hdfsWrite(_service->get_fs(),\n                                            write_file,\n                                            (void *)(data + cur_pos),\n                                            static_cast<tSize>(write_len));\n        if (num_written_bytes == -1) {\n            derror_f(\"Failed to write hdfs file {}, error: {}.\",\n                     file_name(),\n                     utils::safe_strerror(errno));\n            hdfsCloseFile(_service->get_fs(), write_file);\n            return ERR_FS_INTERNAL;\n        }\n        cur_pos += num_written_bytes;\n    }\n    if (hdfsHFlush(_service->get_fs(), write_file) != 0) {\n        derror_f(\n            \"Failed to flush hdfs file {}, error: {}.\", file_name(), utils::safe_strerror(errno));\n        hdfsCloseFile(_service->get_fs(), write_file);\n        return ERR_FS_INTERNAL;\n    }\n    written_size = cur_pos;\n    if (hdfsCloseFile(_service->get_fs(), write_file) != 0) {\n        derror_f(\n            \"Failed to close hdfs file {}, error: {}\", file_name(), utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n\n    ddebug(\"start to synchronize meta data after successfully wrote data to hdfs\");\n    return get_file_meta();\n}\n\ndsn::task_ptr hdfs_file_object::write(const write_request &req,\n                                      dsn::task_code code,\n                                      const write_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr)\n{\n    add_ref();\n    write_future_ptr tsk(new write_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n    auto write_background = [this, req, tsk]() {\n        write_response resp;\n        resp.err = write_data_in_batches(req.buffer.data(), req.buffer.length(), resp.written_size);\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, std::move(write_background));\n    return tsk;\n}\n\ndsn::task_ptr hdfs_file_object::upload(const upload_request &req,\n                                       dsn::task_code code,\n                                       const upload_callback &cb,\n                                       dsn::task_tracker *tracker = nullptr)\n{\n    upload_future_ptr t(new upload_future(code, cb, 0));\n    t->set_tracker(tracker);\n\n    add_ref();\n    auto upload_background = [this, req, t]() {\n        upload_response resp;\n        resp.uploaded_size = 0;\n        std::ifstream is(req.input_local_name, std::ios::binary | std::ios::in);\n        if (is.is_open()) {\n            int64_t file_sz = 0;\n            dsn::utils::filesystem::file_size(req.input_local_name, file_sz);\n            std::unique_ptr<char[]> buffer(new char[file_sz]);\n            is.read(buffer.get(), file_sz);\n            is.close();\n            resp.err = write_data_in_batches(buffer.get(), file_sz, resp.uploaded_size);\n        } else {\n            derror_f(\"HDFS upload failed: open local file {} failed when upload to {}, error: {}\",\n                     req.input_local_name,\n                     file_name(),\n                     utils::safe_strerror(errno));\n            resp.err = dsn::ERR_FILE_OPERATION_FAILED;\n        }\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, upload_background);\n    return t;\n}\n\nerror_code hdfs_file_object::read_data_in_batches(uint64_t start_pos,\n                                                  int64_t length,\n                                                  std::string &read_buffer,\n                                                  size_t &read_length)\n{\n    // get file meta if it is not synchronized.\n    if (!_has_meta_synced) {\n        error_code err = get_file_meta();\n        if (err != ERR_OK) {\n            derror_f(\"Failed to read remote file {}\", file_name());\n            return err;\n        }\n    }\n\n    hdfsFile read_file = hdfsOpenFile(_service->get_fs(), file_name().c_str(), O_RDONLY, 0, 0, 0);\n    if (!read_file) {\n        derror_f(\"Failed to open hdfs file {} for reading, error: {}.\",\n                 file_name(),\n                 utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n    std::unique_ptr<char[]> raw_buf(new char[_size]);\n    char *dst_buf = raw_buf.get();\n\n    // if length = -1, we should read the whole file.\n    uint64_t data_length = (length == -1 ? _size : length);\n    uint64_t cur_pos = start_pos;\n    uint64_t read_size = 0;\n    bool read_success = true;\n    while (cur_pos < start_pos + data_length) {\n        const uint64_t rate = FLAGS_hdfs_read_limit_rate_mb_per_sec << 20;\n        read_size = std::min(start_pos + data_length - cur_pos, FLAGS_hdfs_read_batch_size_bytes);\n        // burst size should not be less than consume size\n        const uint64_t burst_size = std::max(2 * rate, read_size);\n        _service->_read_token_bucket->consumeWithBorrowAndWait(read_size, rate, burst_size);\n\n        tSize num_read_bytes = hdfsPread(_service->get_fs(),\n                                         read_file,\n                                         static_cast<tOffset>(cur_pos),\n                                         (void *)dst_buf,\n                                         static_cast<tSize>(read_size));\n        if (num_read_bytes > 0) {\n            cur_pos += num_read_bytes;\n            dst_buf += num_read_bytes;\n        } else if (num_read_bytes == -1) {\n            derror_f(\"Failed to read hdfs file {}, error: {}.\",\n                     file_name(),\n                     utils::safe_strerror(errno));\n            read_success = false;\n            break;\n        }\n    }\n    if (hdfsCloseFile(_service->get_fs(), read_file) != 0) {\n        derror_f(\n            \"Failed to close hdfs file {}, error: {}.\", file_name(), utils::safe_strerror(errno));\n        return ERR_FS_INTERNAL;\n    }\n    if (read_success) {\n        read_length = cur_pos - start_pos;\n        read_buffer = std::string(raw_buf.get(), dst_buf - raw_buf.get());\n        return ERR_OK;\n    }\n    return ERR_FS_INTERNAL;\n}\n\ndsn::task_ptr hdfs_file_object::read(const read_request &req,\n                                     dsn::task_code code,\n                                     const read_callback &cb,\n                                     dsn::task_tracker *tracker = nullptr)\n{\n    read_future_ptr tsk(new read_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    add_ref();\n    auto read_func = [this, req, tsk]() {\n        size_t read_length = 0;\n        read_response resp;\n        std::string read_buffer;\n        resp.err =\n            read_data_in_batches(req.remote_pos, req.remote_length, read_buffer, read_length);\n        if (resp.err == ERR_OK) {\n            resp.buffer = blob::create_from_bytes(std::move(read_buffer));\n        }\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, std::move(read_func));\n    return tsk;\n}\n\ndsn::task_ptr hdfs_file_object::download(const download_request &req,\n                                         dsn::task_code code,\n                                         const download_callback &cb,\n                                         dsn::task_tracker *tracker = nullptr)\n{\n    download_future_ptr t(new download_future(code, cb, 0));\n    t->set_tracker(tracker);\n\n    add_ref();\n    auto download_background = [this, req, t]() {\n        download_response resp;\n        resp.downloaded_size = 0;\n        std::string read_buffer;\n        size_t read_length = 0;\n        resp.err =\n            read_data_in_batches(req.remote_pos, req.remote_length, read_buffer, read_length);\n        if (resp.err == ERR_OK) {\n            bool write_succ = false;\n            if (FLAGS_enable_direct_io) {\n                auto dio_file = std::make_unique<direct_io_writable_file>(req.output_local_name);\n                do {\n                    if (!dio_file->initialize()) {\n                        break;\n                    }\n                    bool wr_ret = dio_file->write(read_buffer.c_str(), read_length);\n                    if (!wr_ret) {\n                        break;\n                    }\n                    if (dio_file->finalize()) {\n                        resp.downloaded_size = read_length;\n                        resp.file_md5 = utils::string_md5(read_buffer.c_str(), read_length);\n                        write_succ = true;\n                    }\n                } while (0);\n            } else {\n                std::ofstream out(req.output_local_name,\n                                  std::ios::binary | std::ios::out | std::ios::trunc);\n                if (out.is_open()) {\n                    out.write(read_buffer.c_str(), read_length);\n                    out.close();\n                    resp.downloaded_size = read_length;\n                    resp.file_md5 = utils::string_md5(read_buffer.c_str(), read_length);\n                    write_succ = true;\n                }\n            }\n            if (!write_succ) {\n                derror_f(\"HDFS download failed: fail to open localfile {} when download {}, \"\n                         \"error: {}\",\n                         req.output_local_name,\n                         file_name(),\n                         utils::safe_strerror(errno));\n                resp.err = ERR_FILE_OPERATION_FAILED;\n                resp.downloaded_size = 0;\n            }\n        }\n        t->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_HDFS_SERVICE_CALL, tracker, download_background);\n    return t;\n}\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/hdfs/hdfs_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/block_service.h>\n#include <hdfs/hdfs.h>\n\nnamespace folly {\ntemplate <typename Clock>\nclass BasicDynamicTokenBucket;\n\nusing DynamicTokenBucket = BasicDynamicTokenBucket<std::chrono::steady_clock>;\n}\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass hdfs_service : public block_filesystem\n{\npublic:\n    hdfs_service();\n    error_code create_fs();\n    hdfsFS get_fs() { return _fs; }\n\n    ~hdfs_service();\n    error_code initialize(const std::vector<std::string> &args) override;\n    dsn::task_ptr list_dir(const ls_request &req,\n                           dsn::task_code code,\n                           const ls_callback &cb,\n                           dsn::task_tracker *tracker) override;\n    dsn::task_ptr create_file(const create_file_request &req,\n                              dsn::task_code code,\n                              const create_file_callback &cb,\n                              dsn::task_tracker *tracker) override;\n    dsn::task_ptr remove_path(const remove_path_request &req,\n                              dsn::task_code code,\n                              const remove_path_callback &cb,\n                              dsn::task_tracker *tracker) override;\n    void close();\n\n    static std::string get_hdfs_entry_name(const std::string &hdfs_path);\n\n    bool is_root_path_set() const override { return _hdfs_path != \"/\"; }\n\nprivate:\n    hdfsFS _fs;\n    std::string _hdfs_name_node;\n    std::string _hdfs_path;\n\n    std::unique_ptr<folly::DynamicTokenBucket> _read_token_bucket;\n    std::unique_ptr<folly::DynamicTokenBucket> _write_token_bucket;\n\n    friend class hdfs_file_object;\n};\n\nclass hdfs_file_object : public block_file\n{\npublic:\n    hdfs_file_object(hdfs_service *s, const std::string &name);\n    ~hdfs_file_object();\n    uint64_t get_size() override { return _size; }\n    const std::string &get_md5sum() override { return _md5sum; }\n    dsn::task_ptr write(const write_request &req,\n                        dsn::task_code code,\n                        const write_callback &cb,\n                        dsn::task_tracker *tracker) override;\n    dsn::task_ptr read(const read_request &req,\n                       dsn::task_code code,\n                       const read_callback &cb,\n                       dsn::task_tracker *tracker) override;\n    dsn::task_ptr upload(const upload_request &req,\n                         dsn::task_code code,\n                         const upload_callback &cb,\n                         dsn::task_tracker *tracker) override;\n    dsn::task_ptr download(const download_request &req,\n                           dsn::task_code code,\n                           const download_callback &cb,\n                           dsn::task_tracker *tracker) override;\n    error_code get_file_meta();\n\nprivate:\n    error_code\n    write_data_in_batches(const char *data, const uint64_t data_size, uint64_t &written_size);\n    error_code read_data_in_batches(uint64_t start_pos,\n                                    int64_t length,\n                                    std::string &read_buffer,\n                                    size_t &read_length);\n\n    hdfs_service *_service;\n    std::string _md5sum;\n    uint64_t _size;\n    bool _has_meta_synced;\n};\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/local/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.block_service.local)\n\n#Source files under CURRENT project directory will be automatically included.\n#You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#Search mode for source files under CURRENT project directory ?\n#\"GLOB_RECURSE\" for recursive search\n#\"GLOB\" for non - recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n#Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n"
  },
  {
    "path": "src/block_service/local/local_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/cpp/json_helper.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/safe_strerror_posix.h>\n#include <dsn/utility/strings.h>\n#include <dsn/utility/utils.h>\n#include <memory>\n#include <nlohmann/json.hpp>\n\n#include \"local_service.h\"\n\nstatic const int max_length = 2048; // max data length read from file each time\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nDEFINE_TASK_CODE(LPC_LOCAL_SERVICE_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_BLOCK_SERVICE)\n\nstruct file_metadata\n{\n    uint64_t size;\n    std::string md5;\n};\nNLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(file_metadata, size, md5)\n\nbool file_metadata_from_json(std::ifstream &fin, file_metadata &fmeta) noexcept\n{\n    std::string data;\n    fin >> data;\n    try {\n        nlohmann::json::parse(data).get_to(fmeta);\n        return true;\n    } catch (nlohmann::json::exception &exp) {\n        dwarn_f(\"decode meta data from json failed: {} [{}]\", exp.what(), data);\n        return false;\n    }\n}\n\nstd::string local_service::get_metafile(const std::string &filepath)\n{\n    std::string dir_part = utils::filesystem::remove_file_name(filepath);\n    std::string base_part = utils::filesystem::get_file_name(filepath);\n\n    return utils::filesystem::path_combine(dir_part, std::string(\".\") + base_part + \".meta\");\n}\n\nlocal_service::local_service() {}\n\nlocal_service::local_service(const std::string &root) : _root(root) {}\n\nlocal_service::~local_service() {}\n\nerror_code local_service::initialize(const std::vector<std::string> &args)\n{\n    if (args.size() > 0 && _root.empty())\n        _root = args[0];\n\n    if (_root.empty()) {\n        ddebug(\"initialize local block service succeed with empty root\");\n    } else {\n        if (::dsn::utils::filesystem::directory_exists(_root)) {\n            dwarn(\"old local block service root dir has already exist, path(%s)\", _root.c_str());\n        } else {\n            if (!::dsn::utils::filesystem::create_directory(_root)) {\n                dassert(false, \"local block service create directory(%s) fail\", _root.c_str());\n                return ERR_FS_INTERNAL;\n            }\n        }\n        ddebug(\"local block service initialize succeed with root(%s)\", _root.c_str());\n    }\n    return ERR_OK;\n}\n\ndsn::task_ptr local_service::list_dir(const ls_request &req,\n                                      dsn::task_code code,\n                                      const ls_callback &callback,\n                                      task_tracker *tracker)\n{\n    ls_future_ptr tsk(new ls_future(code, callback, 0));\n    tsk->set_tracker(tracker);\n\n    // process\n    auto list_dir_background = [this, req, tsk]() {\n        std::string dir_path = ::dsn::utils::filesystem::path_combine(_root, req.dir_name);\n        std::vector<std::string> children;\n\n        ls_response resp;\n        resp.err = ERR_OK;\n\n        if (::dsn::utils::filesystem::file_exists(dir_path)) {\n            ddebug(\"list_dir: invalid parameter(%s)\", dir_path.c_str());\n            resp.err = ERR_INVALID_PARAMETERS;\n        } else if (!::dsn::utils::filesystem::directory_exists(dir_path)) {\n            ddebug(\"directory does not exist, dir = %s\", dir_path.c_str());\n            resp.err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            if (!::dsn::utils::filesystem::get_subfiles(dir_path, children, false)) {\n                derror(\"get files under directory: %s fail\", dir_path.c_str());\n                resp.err = ERR_FS_INTERNAL;\n                children.clear();\n            } else {\n                ls_entry tentry;\n                tentry.is_directory = false;\n\n                std::set<std::string> file_matcher;\n                for (const std::string &file : children) {\n                    file_matcher.insert(utils::filesystem::get_file_name(file));\n                }\n                for (const auto &file : file_matcher) {\n                    if (file_matcher.find(get_metafile(file)) != file_matcher.end()) {\n                        tentry.entry_name = file;\n                        resp.entries->emplace_back(tentry);\n                    }\n                }\n            }\n\n            children.clear();\n            if (!::dsn::utils::filesystem::get_subdirectories(dir_path, children, false)) {\n                derror(\"get subpaths under directory: %s fail\", dir_path.c_str());\n                resp.err = ERR_FS_INTERNAL;\n                children.clear();\n            } else {\n                ls_entry tentry;\n                tentry.is_directory = true;\n\n                for (const auto &dir : children) {\n                    tentry.entry_name = ::dsn::utils::filesystem::get_file_name(dir);\n                    resp.entries->emplace_back(tentry);\n                }\n            }\n        }\n        tsk->enqueue_with(std::move(resp));\n    };\n\n    tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(list_dir_background));\n    return tsk;\n}\n\ndsn::task_ptr local_service::create_file(const create_file_request &req,\n                                         dsn::task_code code,\n                                         const create_file_callback &cb,\n                                         task_tracker *tracker)\n{\n    create_file_future_ptr tsk(new create_file_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    if (req.ignore_metadata) {\n        create_file_response resp;\n        resp.err = ERR_OK;\n        resp.file_handle =\n            new local_file_object(::dsn::utils::filesystem::path_combine(_root, req.file_name));\n        tsk->enqueue_with(resp);\n        return tsk;\n    }\n\n    auto create_file_background = [this, req, tsk]() {\n        std::string file_path = utils::filesystem::path_combine(_root, req.file_name);\n        std::string meta_file_path =\n            utils::filesystem::path_combine(_root, get_metafile(req.file_name));\n        create_file_response resp;\n        resp.err = ERR_OK;\n\n        dsn::ref_ptr<local_file_object> f = new local_file_object(file_path);\n        if (utils::filesystem::file_exists(file_path) &&\n            utils::filesystem::file_exists(meta_file_path)) {\n\n            dinfo(\"file(%s) already exist\", file_path.c_str());\n            resp.err = f->load_metadata();\n        }\n\n        if (ERR_OK == resp.err)\n            resp.file_handle = f;\n\n        tsk->enqueue_with(resp);\n    };\n\n    tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(create_file_background));\n    return tsk;\n}\n\ndsn::task_ptr local_service::remove_path(const remove_path_request &req,\n                                         dsn::task_code code,\n                                         const remove_path_callback &cb,\n                                         task_tracker *tracker)\n{\n    remove_path_future_ptr tsk(new remove_path_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    auto remove_path_background = [this, req, tsk]() {\n        remove_path_response resp;\n        resp.err = ERR_OK;\n\n        bool do_remove = true;\n\n        std::string full_path = utils::filesystem::path_combine(_root, req.path);\n        if (utils::filesystem::directory_exists(full_path)) {\n            auto res = utils::filesystem::is_directory_empty(full_path);\n            if (res.first == ERR_OK) {\n                // directory is not empty & recursive = false\n                if (!res.second && !req.recursive) {\n                    resp.err = ERR_DIR_NOT_EMPTY;\n                    do_remove = false;\n                }\n            } else {\n                resp.err = ERR_FS_INTERNAL;\n                do_remove = false;\n            }\n        } else if (!utils::filesystem::file_exists(full_path)) {\n            resp.err = ERR_OBJECT_NOT_FOUND;\n            do_remove = false;\n        }\n\n        if (do_remove) {\n            if (!utils::filesystem::remove_path(full_path)) {\n                resp.err = ERR_FS_INTERNAL;\n            }\n        }\n\n        tsk->enqueue_with(resp);\n    };\n\n    tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(remove_path_background));\n    return tsk;\n}\n\n// local_file_object\nlocal_file_object::local_file_object(const std::string &name)\n    : block_file(name), _size(0), _md5_value(\"\"), _has_meta_synced(false)\n{\n}\n\nlocal_file_object::~local_file_object() {}\n\nconst std::string &local_file_object::get_md5sum() { return _md5_value; }\n\nuint64_t local_file_object::get_size() { return _size; }\n\nerror_code local_file_object::load_metadata()\n{\n    if (_has_meta_synced)\n        return ERR_OK;\n\n    std::string metadata_path = local_service::get_metafile(file_name());\n    std::ifstream is(metadata_path, std::ios::in);\n    if (!is.is_open()) {\n        dwarn(\"load meta data from %s failed, err = %s\", utils::safe_strerror(errno).c_str());\n        return ERR_FS_INTERNAL;\n    }\n    auto cleanup = dsn::defer([&is]() { is.close(); });\n\n    file_metadata meta;\n    bool ans = file_metadata_from_json(is, meta);\n    if (!ans) {\n        return ERR_FS_INTERNAL;\n    }\n    _size = meta.size;\n    _md5_value = meta.md5;\n    _has_meta_synced = true;\n    return ERR_OK;\n}\n\nerror_code local_file_object::store_metadata()\n{\n    file_metadata meta;\n    meta.md5 = _md5_value;\n    meta.size = _size;\n\n    std::string metadata_path = local_service::get_metafile(file_name());\n    std::ofstream os(metadata_path, std::ios::out | std::ios::trunc);\n    if (!os.is_open()) {\n        dwarn(\"store to metadata file %s failed, err=%s\",\n              metadata_path.c_str(),\n              utils::safe_strerror(errno).c_str());\n        return ERR_FS_INTERNAL;\n    }\n    auto cleanup = dsn::defer([&os]() { os.close(); });\n    os << nlohmann::json(meta);\n\n    return ERR_OK;\n}\n\ndsn::task_ptr local_file_object::write(const write_request &req,\n                                       dsn::task_code code,\n                                       const write_callback &cb,\n                                       task_tracker *tracker)\n{\n    add_ref();\n\n    write_future_ptr tsk(new write_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    FAIL_POINT_INJECT_F(\"mock_local_service_write_failed\", [=](dsn::string_view) {\n        auto write_failed = [=]() {\n            write_response resp;\n            resp.err = ERR_FS_INTERNAL;\n            tsk->enqueue_with(resp);\n            release_ref();\n        };\n        dsn::tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(write_failed));\n        return tsk;\n    });\n\n    auto write_background = [this, req, tsk]() {\n        write_response resp;\n        resp.err = ERR_OK;\n        if (!::dsn::utils::filesystem::file_exists(file_name())) {\n            if (!::dsn::utils::filesystem::create_file(file_name())) {\n                resp.err = ERR_FS_INTERNAL;\n            }\n        }\n\n        if (resp.err == ERR_OK) {\n            dinfo(\"start write file, file = %s\", file_name().c_str());\n\n            std::ofstream fout(file_name(), std::ifstream::out | std::ifstream::trunc);\n            if (!fout.is_open()) {\n                resp.err = ERR_FS_INTERNAL;\n            } else {\n                fout.write(req.buffer.data(), req.buffer.length());\n                resp.written_size = req.buffer.length();\n                fout.close();\n\n                // Currently we calc the meta data from source data, which save the io bandwidth\n                // a lot, but it is somewhat not correct.\n                _size = resp.written_size;\n                _md5_value = utils::string_md5(req.buffer.data(), req.buffer.length());\n                _has_meta_synced = true;\n\n                store_metadata();\n            }\n        }\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n    ::dsn::tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(write_background));\n    return tsk;\n}\n\ndsn::task_ptr local_file_object::read(const read_request &req,\n                                      dsn::task_code code,\n                                      const read_callback &cb,\n                                      task_tracker *tracker)\n{\n    add_ref();\n\n    read_future_ptr tsk(new read_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n\n    auto read_func = [this, req, tsk]() {\n        read_response resp;\n        resp.err = ERR_OK;\n        if (!utils::filesystem::file_exists(file_name()) ||\n            !utils::filesystem::file_exists(local_service::get_metafile(file_name()))) {\n            resp.err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            if ((resp.err = load_metadata()) != ERR_OK) {\n                dwarn(\"load meta data of %s failed\", file_name().c_str());\n            } else {\n                int64_t file_sz = _size;\n                int64_t total_sz = 0;\n                if (req.remote_length == -1 || req.remote_length + req.remote_pos > file_sz) {\n                    total_sz = file_sz - req.remote_pos;\n                } else {\n                    total_sz = req.remote_length;\n                }\n\n                dinfo(\"read file(%s), size = %ld\", file_name().c_str(), total_sz);\n                std::string buf;\n                buf.resize(total_sz + 1);\n                std::ifstream fin(file_name(), std::ifstream::in);\n                if (!fin.is_open()) {\n                    resp.err = ERR_FS_INTERNAL;\n                } else {\n                    fin.seekg(static_cast<int64_t>(req.remote_pos), fin.beg);\n                    fin.read((char *)buf.c_str(), total_sz);\n                    buf[fin.gcount()] = '\\0';\n                    resp.buffer = blob::create_from_bytes(std::move(buf));\n                }\n                fin.close();\n            }\n        }\n\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n\n    dsn::tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(read_func));\n    return tsk;\n}\n\ndsn::task_ptr local_file_object::upload(const upload_request &req,\n                                        dsn::task_code code,\n                                        const upload_callback &cb,\n                                        task_tracker *tracker)\n{\n    add_ref();\n    upload_future_ptr tsk(new upload_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n    auto upload_file_func = [this, req, tsk]() {\n        upload_response resp;\n        resp.err = ERR_OK;\n        std::ifstream fin(req.input_local_name, std::ios_base::in);\n        if (!fin.is_open()) {\n            dwarn(\"open source file %s for read failed, err(%s)\",\n                  req.input_local_name.c_str(),\n                  utils::safe_strerror(errno).c_str());\n            resp.err = ERR_FILE_OPERATION_FAILED;\n        }\n\n        utils::filesystem::create_file(file_name());\n        std::ofstream fout(file_name(), std::ios_base::out | std::ios_base::trunc);\n        if (!fout.is_open()) {\n            dwarn(\"open target file %s for write failed, err(%s)\",\n                  file_name().c_str(),\n                  utils::safe_strerror(errno).c_str());\n            resp.err = ERR_FS_INTERNAL;\n        }\n\n        if (resp.err == ERR_OK) {\n            dinfo(\"start to transfer from src_file(%s) to des_file(%s)\",\n                  req.input_local_name.c_str(),\n                  file_name().c_str());\n            int64_t total_sz = 0;\n            char buf[max_length] = {'\\0'};\n            while (!fin.eof()) {\n                fin.read(buf, max_length);\n                total_sz += fin.gcount();\n                fout.write(buf, fin.gcount());\n            }\n            dinfo(\"finish upload file, file = %s, total_size = %d\", file_name().c_str(), total_sz);\n            fout.close();\n            fin.close();\n\n            resp.uploaded_size = static_cast<uint64_t>(total_sz);\n\n            // calc the md5sum by source file for simplicity\n            _size = total_sz;\n            error_code res = utils::filesystem::md5sum(req.input_local_name, _md5_value);\n            if (res == dsn::ERR_OK) {\n                _has_meta_synced = true;\n                store_metadata();\n            } else {\n                resp.err = ERR_FS_INTERNAL;\n            }\n        } else {\n            if (fin.is_open())\n                fin.close();\n            if (fout.is_open())\n                fout.close();\n        }\n\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n    ::dsn::tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(upload_file_func));\n\n    return tsk;\n}\n\ndsn::task_ptr local_file_object::download(const download_request &req,\n                                          dsn::task_code code,\n                                          const download_callback &cb,\n                                          task_tracker *tracker)\n{\n    // download the whole file\n    add_ref();\n    download_future_ptr tsk(new download_future(code, cb, 0));\n    tsk->set_tracker(tracker);\n    auto download_file_func = [this, req, tsk]() {\n        download_response resp;\n        resp.err = ERR_OK;\n        std::string target_file = req.output_local_name;\n        if (target_file.empty()) {\n            derror(\"download %s failed, because target name(%s) is invalid\",\n                   file_name().c_str(),\n                   target_file.c_str());\n            resp.err = ERR_INVALID_PARAMETERS;\n        }\n\n        if (resp.err == ERR_OK && !_has_meta_synced) {\n            if (!utils::filesystem::file_exists(file_name()) ||\n                !utils::filesystem::file_exists(local_service::get_metafile(file_name()))) {\n                resp.err = ERR_OBJECT_NOT_FOUND;\n            }\n        }\n\n        if (resp.err == ERR_OK) {\n            std::ifstream fin(file_name(), std::ifstream::in);\n            if (!fin.is_open()) {\n                derror(\"open block file(%s) failed, err(%s)\",\n                       file_name().c_str(),\n                       utils::safe_strerror(errno).c_str());\n                resp.err = ERR_FS_INTERNAL;\n            }\n\n            std::ofstream fout(target_file, std::ios_base::out | std::ios_base::trunc);\n            if (!fout.is_open()) {\n                if (fin.is_open())\n                    fin.close();\n                derror(\"open target file(%s) failed, err(%s)\",\n                       target_file.c_str(),\n                       utils::safe_strerror(errno).c_str());\n                resp.err = ERR_FILE_OPERATION_FAILED;\n            }\n\n            if (resp.err == ERR_OK) {\n                dinfo(\"start to transfer, src_file(%s), des_file(%s)\",\n                      file_name().c_str(),\n                      target_file.c_str());\n                int64_t total_sz = 0;\n                char buf[max_length] = {'\\0'};\n                while (!fin.eof()) {\n                    fin.read(buf, max_length);\n                    total_sz += fin.gcount();\n                    fout.write(buf, fin.gcount());\n                }\n                dinfo(\"finish download file(%s), total_size = %d\", target_file.c_str(), total_sz);\n                fout.close();\n                fin.close();\n                resp.downloaded_size = static_cast<uint64_t>(total_sz);\n\n                _size = total_sz;\n                if ((resp.err = utils::filesystem::md5sum(target_file, _md5_value)) != ERR_OK) {\n                    dwarn(\"download %s failed when calculate the md5sum of %s\",\n                          file_name().c_str(),\n                          target_file.c_str());\n                } else {\n                    _has_meta_synced = true;\n                    resp.file_md5 = _md5_value;\n                }\n            }\n        }\n\n        tsk->enqueue_with(resp);\n        release_ref();\n    };\n    ::dsn::tasking::enqueue(LPC_LOCAL_SERVICE_CALL, nullptr, std::move(download_file_func));\n\n    return tsk;\n}\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/local/local_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <fstream>\n\n#include <dsn/dist/block_service.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass local_service : public block_filesystem\n{\npublic:\n    local_service();\n    local_service(const std::string &root);\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual dsn::task_ptr list_dir(const ls_request &req,\n                                   dsn::task_code code,\n                                   const ls_callback &callback,\n                                   dsn::task_tracker *tracker = nullptr) override;\n\n    virtual dsn::task_ptr create_file(const create_file_request &req,\n                                      dsn::task_code code,\n                                      const create_file_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr) override;\n\n    virtual dsn::task_ptr remove_path(const remove_path_request &req,\n                                      dsn::task_code code,\n                                      const remove_path_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr) override;\n\n    virtual ~local_service();\n\n    static std::string get_metafile(const std::string &filepath);\n\nprivate:\n    std::string _root;\n};\n\nclass local_file_object : public block_file\n{\npublic:\n    local_file_object(const std::string &name);\n\n    virtual ~local_file_object();\n\n    virtual uint64_t get_size() override;\n    virtual const std::string &get_md5sum() override;\n\n    virtual dsn::task_ptr write(const write_request &req,\n                                dsn::task_code code,\n                                const write_callback &cb,\n                                dsn::task_tracker *tracker = nullptr) override;\n\n    virtual dsn::task_ptr read(const read_request &req,\n                               dsn::task_code code,\n                               const read_callback &cb,\n                               dsn::task_tracker *tracker = nullptr) override;\n\n    virtual dsn::task_ptr upload(const upload_request &req,\n                                 dsn::task_code code,\n                                 const upload_callback &cb,\n                                 dsn::task_tracker *tracker = nullptr) override;\n\n    virtual dsn::task_ptr download(const download_request &req,\n                                   dsn::task_code code,\n                                   const download_callback &cb,\n                                   dsn::task_tracker *tracker = nullptr) override;\n\n    error_code load_metadata();\n    error_code store_metadata();\n\nprivate:\n    std::string compute_md5();\n\nprivate:\n    uint64_t _size;\n    std::string _md5_value;\n    bool _has_meta_synced;\n};\n}\n}\n}\n"
  },
  {
    "path": "src/block_service/test/CMakeLists.txt",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nset(MY_PROJ_NAME dsn_block_service_test)\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n    dsn_replication_common\n    dsn.block_service\n    dsn.block_service.local\n    dsn.block_service.fds\n    dsn.block_service.hdfs\n    dsn_runtime\n    galaxy-fds-sdk-cpp\n    PocoNet\n    PocoFoundation\n    PocoNetSSL\n    PocoJSON\n    gtest\n    gtest_main\n    hdfs\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n    config-test.ini\n    run.sh\n    clear.sh\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/block_service/test/block_service_manager_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"block_service_mock.h\"\n#include \"block_service/block_service_manager.h\"\n#include \"block_service/local/local_service.h\"\n\n#include <fstream>\n\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass block_service_manager_test : public ::testing::Test\n{\npublic:\n    block_service_manager_test()\n    {\n        _fs = make_unique<block_service_mock>();\n        utils::filesystem::create_directory(LOCAL_DIR);\n    }\n\n    ~block_service_manager_test() { utils::filesystem::remove_path(LOCAL_DIR); }\n\npublic:\n    error_code test_download_file(uint64_t &download_size)\n    {\n        return _block_service_manager.download_file(\n            PROVIDER, LOCAL_DIR, FILE_NAME, _fs.get(), download_size);\n    }\n\n    void create_local_file(const std::string &file_name)\n    {\n        std::string whole_name = utils::filesystem::path_combine(LOCAL_DIR, file_name);\n        utils::filesystem::create_file(whole_name);\n        std::ofstream test_file;\n        test_file.open(whole_name);\n        test_file << \"write some data.\\n\";\n        test_file.close();\n\n        _file_meta.name = whole_name;\n        utils::filesystem::md5sum(whole_name, _file_meta.md5);\n        utils::filesystem::file_size(whole_name, _file_meta.size);\n    }\n\n    void create_remote_file(const std::string &file_name, int64_t size, const std::string &md5)\n    {\n        std::string whole_file_name = utils::filesystem::path_combine(PROVIDER, file_name);\n        _fs->files[whole_file_name] = std::make_pair(size, md5);\n    }\n\npublic:\n    block_service_manager _block_service_manager;\n    std::unique_ptr<block_service_mock> _fs;\n\n    replication::file_meta _file_meta;\n    std::string PROVIDER = \"local_service\";\n    std::string LOCAL_DIR = \"test_dir\";\n    std::string FILE_NAME = \"test_file\";\n};\n\n// download_file unit tests\nTEST_F(block_service_manager_test, do_download_remote_file_not_exist)\n{\n    utils::filesystem::remove_path(LOCAL_DIR);\n    auto fs = make_unique<local_service>();\n    fs->initialize({LOCAL_DIR});\n    uint64_t download_size = 0;\n    error_code err = _block_service_manager.download_file(\n        PROVIDER, LOCAL_DIR, FILE_NAME, fs.get(), download_size);\n    ASSERT_EQ(err, ERR_CORRUPTION); // file does not exist\n}\n\nTEST_F(block_service_manager_test, do_download_same_name_file)\n{\n    // local file exists, but md5 not matched with remote file\n    create_local_file(FILE_NAME);\n    create_remote_file(FILE_NAME, 2333, \"md5_not_match\");\n    uint64_t download_size = 0;\n    ASSERT_EQ(test_download_file(download_size), ERR_PATH_ALREADY_EXIST);\n    ASSERT_EQ(download_size, 0);\n}\n\nTEST_F(block_service_manager_test, do_download_file_exist)\n{\n    create_local_file(FILE_NAME);\n    create_remote_file(FILE_NAME, _file_meta.size, _file_meta.md5);\n    uint64_t download_size = 0;\n    ASSERT_EQ(test_download_file(download_size), ERR_PATH_ALREADY_EXIST);\n    ASSERT_EQ(download_size, 0);\n}\n\nTEST_F(block_service_manager_test, do_download_succeed)\n{\n    create_local_file(FILE_NAME);\n    create_remote_file(FILE_NAME, _file_meta.size, _file_meta.md5);\n    // remove local file to mock condition that file not existed\n    std::string file_name = utils::filesystem::path_combine(LOCAL_DIR, FILE_NAME);\n    utils::filesystem::remove_path(file_name);\n    uint64_t download_size = 0;\n    ASSERT_EQ(test_download_file(download_size), ERR_OK);\n    ASSERT_EQ(download_size, _file_meta.size);\n}\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/test/block_service_mock.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"block_service/block_service_manager.h\"\n\n#include <dsn/dist/block_service.h>\n#include <dsn/utility/filesystem.h>\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\nclass block_file_mock : public block_file\n{\npublic:\n    block_file_mock(const std::string &_name, int64_t _size, const std::string &_md5)\n        : block_file(_name),\n          size(_size),\n          md5(_md5),\n          enable_write_fail(false),\n          enable_read_fail(false),\n          enable_upload_fail(false)\n    {\n    }\n\n    virtual uint64_t get_size() { return static_cast<uint64_t>(size); }\n\n    virtual const std::string &get_md5sum() { return md5; }\n\n    virtual dsn::task_ptr write(const write_request &req,\n                                dsn::task_code code,\n                                const write_callback &cb,\n                                dsn::task_tracker *tracker = nullptr)\n    {\n        write_response resp;\n        if (enable_write_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            context = std::move(req.buffer);\n            resp.written_size = context.length();\n        }\n        cb(resp);\n        return task_ptr();\n    }\n\n    virtual dsn::task_ptr read(const read_request &req,\n                               dsn::task_code code,\n                               const read_callback &cb,\n                               dsn::task_tracker *tracker = nullptr)\n    {\n        read_response resp;\n        if (enable_read_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            if (size <= 0 && md5.empty()) {\n                resp.buffer = blob();\n            } else {\n                resp.buffer = context;\n            }\n        }\n        cb(resp);\n        return task_ptr();\n    }\n\n    virtual dsn::task_ptr upload(const upload_request &req,\n                                 dsn::task_code code,\n                                 const upload_callback &cb,\n                                 dsn::task_tracker *tracker = nullptr)\n    {\n        upload_response resp;\n        if (enable_upload_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            // just return the file size\n            resp.uploaded_size = size;\n        }\n        cb(resp);\n        return task_ptr();\n    }\n\n    virtual dsn::task_ptr download(const download_request &req,\n                                   dsn::task_code code,\n                                   const download_callback &cb,\n                                   dsn::task_tracker *tracker = nullptr)\n    {\n        download_response resp;\n        resp.err = ERR_OK;\n        resp.downloaded_size = size;\n        cb(resp);\n        return task_ptr();\n    }\n\n    // make file exist,\n    void file_exist(const std::string &_md5, int64_t _size)\n    {\n        md5 = _md5;\n        size = _size;\n    }\n    // make file not exist\n    void clear_file_exist()\n    {\n        size = 0;\n        md5.clear();\n    }\n\n    void set_context(const std::string &value)\n    {\n        auto len = value.length();\n        std::shared_ptr<char> buf = utils::make_shared_array<char>(len);\n        ::memcpy(buf.get(), value.c_str(), len);\n        blob write_buf(std::move(buf), static_cast<unsigned int>(len));\n        context = std::move(write_buf);\n    }\n    void clear_context() { context = blob(); }\n\npublic:\n    int64_t size;\n    std::string md5;\n    blob context;\n    bool enable_write_fail;\n    bool enable_read_fail;\n    bool enable_upload_fail;\n};\n\nclass block_service_mock : public block_filesystem\n{\npublic:\n    block_service_mock()\n        : block_filesystem(), enable_create_file_fail(false), enable_list_dir_fail(false)\n    {\n    }\n    virtual error_code initialize(const std::vector<std::string> &args) { return ERR_OK; }\n\n    virtual dsn::task_ptr list_dir(const ls_request &req,\n                                   dsn::task_code code,\n                                   const ls_callback &callback,\n                                   dsn::task_tracker *tracker = nullptr)\n    {\n        ls_response resp;\n        if (enable_list_dir_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            std::string dir_name = ::dsn::utils::filesystem::get_file_name(req.dir_name);\n            if (dir_files.find(dir_name) != dir_files.end()) {\n                resp.entries = std::make_shared<std::vector<ls_entry>>();\n                (*resp.entries) = dir_files[dir_name];\n            } else {\n                resp.err = ERR_OBJECT_NOT_FOUND;\n            }\n        }\n        callback(resp);\n        return task_ptr();\n    }\n\n    virtual dsn::task_ptr create_file(const create_file_request &req,\n                                      dsn::task_code code,\n                                      const create_file_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr)\n    {\n        create_file_response resp;\n        if (enable_create_file_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            auto it = files.find(req.file_name);\n            if (it != files.end()) {\n                resp.file_handle =\n                    new block_file_mock(req.file_name, it->second.first, it->second.second);\n            } else {\n                resp.file_handle = new block_file_mock(\"\", 0, \"\");\n                std::cout << \"regular_file is selected...\" << std::endl;\n            }\n        }\n\n        cb(resp);\n        return task_ptr();\n    }\n\n    dsn::task_ptr remove_path(const remove_path_request &req,\n                              dsn::task_code code,\n                              const remove_path_callback &cb,\n                              dsn::task_tracker *tracker)\n    {\n        return task_ptr();\n    }\n\npublic:\n    std::map<std::string, std::vector<ls_entry>> dir_files;\n    std::map<std::string, std::pair<int64_t, std::string>> files;\n    bool enable_create_file_fail;\n    bool enable_list_dir_fail;\n};\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/test/clear.sh",
    "content": "#!/bin/bash\n##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nrm -rf log.* *.log data dsn_block_service_test.xml randomfile* rename_dir* test_dir\n"
  },
  {
    "path": "src/block_service/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n\n[apps.replica]\ntype = replica\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT,THREAD_POOL_BLOCK_SERVICE\n\n[core]\ntool = nativerun\npause_on_start = false\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[fds_concurrent_test]\ntotal_files = 64\nmin_size = 100\nmax_size = 150\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[hdfs_test]\ntest_name_node = <hdfs_name_none>\ntest_backup_path = <hdfs_path>\nnum_test_file_lines = 4096\nnum_total_files_for_hdfs_concurrent_test = 64\n"
  },
  {
    "path": "src/block_service/test/fds_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"block_service/fds/fds_service.h\"\n\n#include <fcntl.h>\n\n#include <array>\n#include <fstream>\n#include <gtest/gtest.h>\n#include <memory>\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/rand.h>\n#include <dsn/dist/block_service.h>\n\nusing namespace dsn;\nusing namespace dsn::dist::block_service;\n\nstatic std::string example_server_address = \"<server-address>\";\n// please modify the the paras below to enable fds_service_test, default fds_service_test will be\n// skipped and return true\nstatic std::string server_address = \"<server-address>\";\nstatic std::string access_key = \"<access-key>\";\nstatic std::string access_secret = \"<access-secret>\";\nstatic std::string bucket_name = \"<test-bucket-name>\";\n\nstatic void pipe_execute(const char *command, std::stringstream &output)\n{\n    std::array<char, 256> buffer;\n\n    std::shared_ptr<FILE> command_pipe(popen(command, \"r\"), pclose);\n    while (!feof(command_pipe.get())) {\n        if (fgets(buffer.data(), 256, command_pipe.get()) != NULL)\n            output << buffer.data();\n    }\n}\n\nstatic void file_eq_compare(const std::string &fname1, const std::string &fname2)\n{\n    static const int length = 4096;\n    std::shared_ptr<char> buffer(dsn::utils::make_shared_array<char>(length * 2));\n    char *buf1 = buffer.get(), *buf2 = buffer.get() + length;\n\n    std::ifstream ifile1(fname1.c_str(), std::ios::in | std::ios::binary);\n    std::ifstream ifile2(fname2.c_str(), std::ios::in | std::ios::binary);\n\n    auto file_length = [](std::ifstream &is) {\n        is.seekg(0, is.end);\n        int result = is.tellg();\n        is.seekg(0, is.beg);\n        return result;\n    };\n\n    int l = file_length(ifile1);\n    ASSERT_EQ(l, file_length(ifile2));\n\n    for (int i = 0; i < l; i += length) {\n        int up_to_bytes = length < (l - i) ? length : (l - i);\n        ifile1.read(buf1, up_to_bytes);\n        ifile2.read(buf2, up_to_bytes);\n        ASSERT_TRUE(memcmp(buf1, buf2, up_to_bytes) == 0);\n    }\n}\n\nclass FDSClientTest : public testing::Test\n{\nprotected:\n    virtual void SetUp() override;\n    virtual void TearDown() override;\n\n    struct file_desc\n    {\n        std::string filename;\n        std::string md5;\n        size_t length;\n    };\n\n    file_desc f1;\n    file_desc f2;\n    std::string local_file_for_download;\n};\n\nvoid FDSClientTest::SetUp()\n{\n    f1.filename = \"test_file\";\n    f2.filename = \"test_2\";\n    local_file_for_download = \"local_download\";\n\n    // generate a test file\n    {\n        int lines = rand::next_u32(1000, 2000);\n        FILE *fp = fopen(f1.filename.c_str(), \"wb\");\n        for (int i = 0; i < lines; ++i) {\n            fprintf(fp, \"%04d_this_is_a_simple_test_file\\n\", i);\n        }\n        fclose(fp);\n\n        std::stringstream ss;\n        pipe_execute((std::string(\"md5sum \") + f1.filename).c_str(), ss);\n        ss >> f1.md5;\n        // well, the string of each line in _test_file is 32\n        f1.length = 32 * lines;\n    }\n\n    // generate another test file\n    {\n        int lines = rand::next_u32(10, 20);\n        FILE *fp = fopen(f2.filename.c_str(), \"wb\");\n        for (int i = 0; i < lines; ++i) {\n            fprintf(fp, \"%04d_this_is_a_simple_test_file\\n\", i);\n        }\n        fclose(fp);\n\n        std::stringstream ss;\n        pipe_execute((std::string(\"md5sum \") + f2.filename).c_str(), ss);\n        ss >> f2.md5;\n        // well, the string of each line in _test_file is 32\n        f2.length = 32 * lines;\n    }\n}\n\nvoid FDSClientTest::TearDown() {}\n\nDEFINE_TASK_CODE(lpc_btest, TASK_PRIORITY_HIGH, dsn::THREAD_POOL_DEFAULT)\n\nTEST_F(FDSClientTest, test_basic_operation)\n{\n    const char *files[] = {\"/fdstest/fdstest1/test1/test1\",\n                           \"/fdstest/fdstest1/test1/test2\",\n                           \"/fdstest/fdstest1/test2/test1\",\n                           \"/fdstest/fdstest1/test2/test2\",\n                           \"/fdstest/fdstest2/test2\",\n                           \"/fdstest/fdstest3\",\n                           \"/fdstest/fds_rootfile\",\n                           nullptr};\n    // ensure prefix_path is the prefix of some file in files\n    std::string prefix_path = std::string(\"/fdstest/fdstest1/test1\");\n    int total_files;\n\n    std::shared_ptr<fds_service> s = std::make_shared<fds_service>();\n    // server, access-key, access-secret, bucket_name\n    std::vector<std::string> args = {server_address, access_key, access_secret, bucket_name};\n\n    if (server_address == example_server_address) {\n        // user don't specify the server-address, we just return true\n        return;\n    }\n\n    s->initialize(args);\n\n    create_file_response cf_resp;\n    ls_response l_resp;\n    upload_response u_resp;\n    download_response d_resp;\n    read_response r_resp;\n    write_response w_resp;\n    remove_path_response rem_resp;\n\n    auto entry_cmp = [](const ls_entry &entry1, const ls_entry &entry2) {\n        return entry1.entry_name < entry2.entry_name;\n    };\n    auto entry_vec_eq = [](const std::vector<ls_entry> &entry_vec1,\n                           const std::vector<ls_entry> &entry_vec2) {\n        ASSERT_EQ(entry_vec1.size(), entry_vec2.size());\n        for (unsigned int i = 0; i < entry_vec1.size(); ++i) {\n            ASSERT_EQ(entry_vec1[i].entry_name, entry_vec2[i].entry_name);\n            ASSERT_EQ(entry_vec1[i].is_directory, entry_vec2[i].is_directory)\n                << \"on index \" << i << \", name \" << entry_vec1[i].entry_name;\n        }\n    };\n\n    // first clean all\n    {\n        std::cout << \"clean all old files\" << std::endl;\n        for (int i = 0; files[i]; ++i) {\n            std::cout << \"delete file \" << files[i] << std::endl;\n            s->remove_path(remove_path_request{std::string(files[i]), true},\n                           lpc_btest,\n                           [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_TRUE(dsn::ERR_OK == rem_resp.err || dsn::ERR_OBJECT_NOT_FOUND == rem_resp.err);\n        }\n    }\n\n    // first upload all these files\n    {\n        std::cout << \"Test upload files\" << std::endl;\n        for (total_files = 0; files[total_files]; ++total_files) {\n            std::cout << \"create and upload: \" << files[total_files] << std::endl;\n            s->create_file(create_file_request{std::string(files[total_files]), true},\n                           lpc_btest,\n                           [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(cf_resp.err, dsn::ERR_OK);\n\n            cf_resp.file_handle\n                ->upload(upload_request{FDSClientTest::f1.filename},\n                         lpc_btest,\n                         [&u_resp](const upload_response &r) { u_resp = r; },\n                         nullptr)\n                ->wait();\n\n            ASSERT_EQ(dsn::ERR_OK, u_resp.err);\n            ASSERT_EQ(FDSClientTest::f1.length, cf_resp.file_handle->get_size());\n            ASSERT_EQ(FDSClientTest::f1.md5, cf_resp.file_handle->get_md5sum());\n        }\n\n        // create a non-exist file for read\n        {\n            std::cout << \"create a non-exist file for read: fds_hellworld\" << std::endl;\n            s->create_file(create_file_request{std::string(\"fds_helloworld\"), false},\n                           lpc_btest,\n                           [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                           nullptr)\n                ->wait();\n\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_TRUE(cf_resp.file_handle->get_md5sum().empty());\n        }\n\n        // create an exist-file for write\n        {\n            std::cout << \"create an exist file for write: \" << files[total_files - 1] << std::endl;\n            s->create_file(create_file_request{std::string(files[total_files - 1]), false},\n                           lpc_btest,\n                           [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                           nullptr)\n                ->wait();\n\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_EQ(FDSClientTest::f1.md5, cf_resp.file_handle->get_md5sum());\n\n            cf_resp.file_handle\n                ->upload(upload_request{FDSClientTest::f2.filename},\n                         lpc_btest,\n                         [&u_resp](const upload_response &r) { u_resp = r; },\n                         nullptr)\n                ->wait();\n\n            ASSERT_EQ(dsn::ERR_OK, u_resp.err);\n            ASSERT_EQ(FDSClientTest::f2.length, cf_resp.file_handle->get_size());\n            ASSERT_EQ(FDSClientTest::f2.md5, cf_resp.file_handle->get_md5sum());\n\n            // upload an non-exist local file\n            cf_resp.file_handle\n                ->upload(upload_request{\"joke_file\"},\n                         lpc_btest,\n                         [&u_resp](const upload_response &r) { u_resp = r; },\n                         nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_FILE_OPERATION_FAILED, u_resp.err);\n\n            // upload an local file which we don't have read-permission\n            cf_resp.file_handle\n                ->upload(upload_request{\"/root/.profile\"},\n                         lpc_btest,\n                         [&u_resp](const upload_response &r) { u_resp = r; },\n                         nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_FILE_OPERATION_FAILED, u_resp.err);\n        }\n    }\n\n    // then test list files\n    {\n        std::cout << \"test ls files\" << std::endl;\n\n        // list the root\n        std::cout << \"list the test root\" << std::endl;\n        std::vector<ls_entry> root = {\n            {\"fdstest1\", true}, {\"fdstest2\", true}, {\"fdstest3\", false}, {\"fds_rootfile\", false}};\n        std::sort(root.begin(), root.end(), entry_cmp);\n\n        s->list_dir(ls_request{\"/fdstest\"},\n                    lpc_btest,\n                    [&l_resp](const ls_response &resp) { l_resp = resp; },\n                    nullptr)\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OK, l_resp.err);\n        std::sort(l_resp.entries->begin(), l_resp.entries->end(), entry_cmp);\n        entry_vec_eq(root, *l_resp.entries);\n\n        // list the fdstest1\n        std::cout << \"list the fdstest1\" << std::endl;\n        std::vector<ls_entry> fdstest1 = {{\"test1\", true}, {\"test2\", true}};\n        std::sort(fdstest1.begin(), fdstest1.end(), entry_cmp);\n\n        s->list_dir(ls_request{\"/fdstest1\"},\n                    lpc_btest,\n                    [&l_resp](const ls_response &resp) { l_resp = resp; },\n                    nullptr)\n            ->wait();\n        std::sort(l_resp.entries->begin(), l_resp.entries->end(), entry_cmp);\n        entry_vec_eq(fdstest1, *l_resp.entries);\n\n        // list the fdstest1/test2\n        std::cout << \"list the fdstest1/test2\" << std::endl;\n        std::vector<ls_entry> fdstest1_slash_test2 = {{\"test1\", false}, {\"test2\", false}};\n        std::sort(fdstest1_slash_test2.begin(), fdstest1_slash_test2.end(), entry_cmp);\n\n        s->list_dir(ls_request{\"/fdstest1/test2\"},\n                    lpc_btest,\n                    [&l_resp](const ls_response &resp) { l_resp = resp; },\n                    nullptr)\n            ->wait();\n        std::sort(l_resp.entries->begin(), l_resp.entries->end(), entry_cmp);\n        entry_vec_eq(fdstest1_slash_test2, *l_resp.entries);\n\n        // list a non-exist dir\n        std::cout << \"list a non-exist dir /fds_hehe\" << std::endl;\n        s->list_dir(ls_request{\"/fds_hehe\"},\n                    lpc_btest,\n                    [&l_resp](const ls_response &resp) { l_resp = resp; },\n                    nullptr)\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OBJECT_NOT_FOUND, l_resp.err);\n\n        // list a regular file\n        std::cout << \"list a regular file /fds_rootfile\" << std::endl;\n        s->list_dir(ls_request{\"/fds_rootfile\"},\n                    lpc_btest,\n                    [&l_resp](const ls_response &resp) { l_resp = resp; },\n                    nullptr)\n            ->wait();\n        ASSERT_EQ(dsn::ERR_INVALID_PARAMETERS, l_resp.err);\n    }\n\n    // then test download files\n    {\n        std::cout << \"test download file, don't ignore metadata\" << std::endl;\n        for (int i = 0; i < total_files - 1; ++i) {\n            std::cout << \"test download \" << files[i] << std::endl;\n            s->create_file(create_file_request{files[i], false},\n                           lpc_btest,\n                           [&cf_resp](const create_file_response &resp) { cf_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_NE(nullptr, cf_resp.file_handle.get());\n            ASSERT_EQ(f1.length, cf_resp.file_handle->get_size());\n            ASSERT_EQ(f1.md5, cf_resp.file_handle->get_md5sum());\n\n            cf_resp.file_handle\n                ->download(download_request{local_file_for_download, 0, -1},\n                           lpc_btest,\n                           [&d_resp](const download_response &resp) { d_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, d_resp.err);\n            ASSERT_EQ(cf_resp.file_handle->get_size(), d_resp.downloaded_size);\n            ASSERT_EQ(cf_resp.file_handle->get_md5sum(), d_resp.file_md5);\n            file_eq_compare(f1.filename, local_file_for_download);\n        }\n\n        std::cout << \"test download file, ignore metadata: \" << files[total_files - 1] << std::endl;\n        s->create_file(create_file_request{files[total_files - 1], true},\n                       lpc_btest,\n                       [&cf_resp](const create_file_response &resp) { cf_resp = resp; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n        ASSERT_NE(nullptr, cf_resp.file_handle.get());\n        ASSERT_TRUE(cf_resp.file_handle->get_md5sum().empty());\n\n        cf_resp.file_handle\n            ->download(download_request{local_file_for_download, 0, -1},\n                       lpc_btest,\n                       [&d_resp](const download_response &resp) { d_resp = resp; },\n                       nullptr)\n            ->wait();\n        ASSERT_EQ(f2.length, cf_resp.file_handle->get_size());\n        ASSERT_EQ(f2.md5, cf_resp.file_handle->get_md5sum());\n        file_eq_compare(f2.filename, local_file_for_download);\n\n        std::cout << \"test partitial download \" << std::endl;\n        cf_resp.file_handle\n            ->download(download_request{local_file_for_download, 32, 32},\n                       lpc_btest,\n                       [&d_resp](const download_response &resp) { d_resp = resp; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, d_resp.err);\n        ASSERT_EQ(32, d_resp.downloaded_size);\n        {\n            std::shared_ptr<FILE> f(fopen(\"tmp_generate\", \"wb\"), [](FILE *p) { fclose(p); });\n            fprintf(f.get(), \"%04d_this_is_a_simple_test_file\\n\", 1);\n        }\n        file_eq_compare(std::string(\"tmp_generate\"), local_file_for_download);\n    }\n\n    // try to read a non-exist file\n    {\n        std::cout << \"test try to read non-exist file\" << std::endl;\n        s->create_file(create_file_request{\"non_exist_file\", true},\n                       lpc_btest,\n                       [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n        ASSERT_NE(nullptr, cf_resp.file_handle.get());\n        ASSERT_TRUE(cf_resp.file_handle->get_md5sum().empty());\n\n        cf_resp.file_handle\n            ->read(read_request{0, -1},\n                   lpc_btest,\n                   [&r_resp](const read_response &r) { r_resp = r; },\n                   nullptr)\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OBJECT_NOT_FOUND, r_resp.err);\n\n        // now file handle has been synced from remote\n        cf_resp.file_handle\n            ->download(download_request{\"local_file\", 0, -1},\n                       lpc_btest,\n                       [&d_resp](const download_response &r) { d_resp = r; },\n                       nullptr)\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OBJECT_NOT_FOUND, d_resp.err);\n        // so we expect the file doesn't create\n        ASSERT_FALSE(dsn::utils::filesystem::file_exists(\"local_file\"));\n    }\n\n    // try to download to a path where we can't create the file\n    {\n        std::cout << \"test try to download to a path where we can't create the file\" << std::endl;\n        s->create_file(create_file_request{files[0], false},\n                       lpc_btest,\n                       [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n        ASSERT_NE(nullptr, cf_resp.file_handle.get());\n        ASSERT_EQ(cf_resp.file_handle->get_size(), f1.length);\n        ASSERT_EQ(cf_resp.file_handle->get_md5sum(), f1.md5);\n\n        cf_resp.file_handle\n            ->download(download_request{\"test_dir/test_file.txt\", 0, -1},\n                       lpc_btest,\n                       [&d_resp](const download_response &r) { d_resp = r; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_FILE_OPERATION_FAILED, d_resp.err);\n        ASSERT_EQ(0, d_resp.downloaded_size);\n\n        cf_resp.file_handle\n            ->download(download_request{\"/root/.profile\", 0, -1},\n                       lpc_btest,\n                       [&d_resp](const download_response &r) { d_resp = r; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_FILE_OPERATION_FAILED, d_resp.err);\n        ASSERT_EQ(0, d_resp.downloaded_size);\n    }\n\n    // try to do write/read\n    {\n        std::cout << \"test read write operation\" << std::endl;\n        s->create_file(create_file_request{files[0], false},\n                       lpc_btest,\n                       [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                       nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n        ASSERT_NE(nullptr, cf_resp.file_handle.get());\n        ASSERT_EQ(cf_resp.file_handle->get_size(), f1.length);\n        ASSERT_EQ(cf_resp.file_handle->get_md5sum(), f1.md5);\n\n        const char *test_buffer = \"1234567890qwertyuiopasdfghjklzxcvbnm\";\n        int length = strlen(test_buffer);\n        dsn::blob bb(test_buffer, 0, length);\n\n        cf_resp.file_handle\n            ->write(write_request{bb},\n                    lpc_btest,\n                    [&w_resp](const write_response &w) { w_resp = w; },\n                    nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, w_resp.err);\n        ASSERT_EQ(length, w_resp.written_size);\n        ASSERT_EQ(length, cf_resp.file_handle->get_size());\n        ASSERT_NE(f1.md5, cf_resp.file_handle->get_md5sum());\n\n        std::cout << \"test read just written value\" << std::endl;\n        cf_resp.file_handle\n            ->read(read_request{0, -1},\n                   lpc_btest,\n                   [&r_resp](const read_response &r) { r_resp = r; },\n                   nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, r_resp.err);\n        ASSERT_EQ(length, r_resp.buffer.length());\n        ASSERT_EQ(0, memcmp(r_resp.buffer.data(), test_buffer, length));\n\n        // partitial read\n        cf_resp.file_handle\n            ->read(read_request{5, 10},\n                   lpc_btest,\n                   [&r_resp](const read_response &r) { r_resp = r; },\n                   nullptr)\n            ->wait();\n\n        ASSERT_EQ(dsn::ERR_OK, r_resp.err);\n        ASSERT_EQ(10, r_resp.buffer.length());\n        ASSERT_EQ(0, memcmp(r_resp.buffer.data(), test_buffer + 5, 10));\n    }\n\n    // then test remove path\n    {\n        // test remove_path\n        {\n            std::cout << \"Test remove non-empty path with recusive = false\" << std::endl;\n            s->remove_path(remove_path_request{std::string(prefix_path), false},\n                           lpc_btest,\n                           [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(rem_resp.err, ERR_DIR_NOT_EMPTY);\n\n            std::cout << \"Test remove non-empty path with recusive = true\" << std::endl;\n            s->remove_path(remove_path_request{std::string(prefix_path), true},\n                           lpc_btest,\n                           [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(rem_resp.err, ERR_OK);\n\n            std::cout << \"Test remove a common path\" << std::endl;\n            for (total_files = 0; files[total_files]; total_files++) {\n                std::string filename = files[total_files];\n                // file under prefix_path already removed\n                if (filename.find(prefix_path) == std::string::npos) {\n                    // remove a single file with recusive = true/false\n                    bool recursive = ((total_files % 2) == 0);\n                    s->remove_path(\n                         remove_path_request{std::string(files[total_files]), recursive},\n                         lpc_btest,\n                         [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                         nullptr)\n                        ->wait();\n                    ASSERT_EQ(rem_resp.err, ERR_OK);\n                }\n                s->create_file(create_file_request{files[total_files], false},\n                               lpc_btest,\n                               [&cf_resp](const create_file_response &resp) { cf_resp = resp; },\n                               nullptr)\n                    ->wait();\n\n                ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n                ASSERT_TRUE(cf_resp.file_handle->get_md5sum().empty());\n            }\n        }\n\n        // test list_dir that total file/dir count greater than 1000\n        {\n            int total_file_cnt = 30;\n            std::cout << \"upload \" << total_file_cnt << \" to fds server\" << std::endl;\n            std::string prefix = \"/fdstest_prefix\";\n            std::string file_name_prefix = \"file_\";\n            std::vector<ls_entry> t_entries;\n\n            // first clean in case last test failed\n            std::cout << \"first clean the dir in case old test failed\" << std::endl;\n            s->remove_path(remove_path_request{std::string(prefix), true},\n                           lpc_btest,\n                           [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                           nullptr)\n                ->wait();\n\n            // generate total_file_cnt files\n            std::cout << \"then start to upload files\" << std::endl;\n            for (int i = 1; i <= total_file_cnt; i++) {\n                std::string filename = file_name_prefix + std::to_string(i);\n                t_entries.emplace_back(ls_entry{std::string(filename), false});\n\n                s->create_file(create_file_request{std::string(prefix + \"/\" + filename), true},\n                               lpc_btest,\n                               [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                               nullptr)\n                    ->wait();\n                ASSERT_EQ(cf_resp.err, dsn::ERR_OK);\n\n                cf_resp.file_handle\n                    ->upload(upload_request{FDSClientTest::f1.filename},\n                             lpc_btest,\n                             [&u_resp](const upload_response &r) { u_resp = r; },\n                             nullptr)\n                    ->wait();\n\n                ASSERT_EQ(dsn::ERR_OK, u_resp.err);\n                ASSERT_EQ(FDSClientTest::f1.length, cf_resp.file_handle->get_size());\n                ASSERT_EQ(FDSClientTest::f1.md5, cf_resp.file_handle->get_md5sum());\n            }\n            l_resp.entries->clear();\n            s->list_dir(ls_request{prefix},\n                        lpc_btest,\n                        [&l_resp](const ls_response &resp) { l_resp = resp; },\n                        nullptr)\n                ->wait();\n            ASSERT_EQ(l_resp.err, ERR_OK);\n            ASSERT_EQ(l_resp.entries->size(), total_file_cnt);\n            std::sort(l_resp.entries->begin(), l_resp.entries->end(), entry_cmp);\n            std::sort(t_entries.begin(), t_entries.end(), entry_cmp);\n            entry_vec_eq(t_entries, *l_resp.entries);\n\n            // then remove all the file, using remove_path\n            std::cout << \"then remove all the files, using remove path\" << std::endl;\n            s->remove_path(remove_path_request{std::string(prefix), true},\n                           lpc_btest,\n                           [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(rem_resp.err, ERR_OK);\n        }\n    }\n}\n\nstatic void\ngenerate_file(const char *filename, unsigned long long file_size, char *block, unsigned block_size)\n{\n    int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP);\n    ASSERT_TRUE(fd > 0) << strerror(errno) << std::endl;\n    for (unsigned long long i = 0; i < file_size;) {\n        int batch_size = (file_size - i);\n        if (batch_size > block_size)\n            batch_size = block_size;\n        i += batch_size;\n\n        for (int j = 0; j < batch_size; ++j) {\n            block[j] = (char)rand::next_u32(0, 255);\n        }\n        write(fd, block, batch_size);\n    }\n    close(fd);\n}\n\nTEST_F(FDSClientTest, test_concurrent_upload_download)\n{\n    char block[1024];\n    const char *str = \"test_str\";\n    for (int i = 0; i < 128; ++i) {\n        memcpy(block + i * 8, str, 8);\n    }\n\n    std::shared_ptr<fds_service> _service = std::make_shared<fds_service>();\n    std::vector<std::string> init_str = {server_address, access_key, access_secret, bucket_name};\n\n    if (server_address == example_server_address) {\n        // user don't specify the server-address, we just return true\n        return;\n    }\n\n    _service->initialize(init_str);\n\n    int total_files = dsn_config_get_value_uint64(\"fds_concurrent_test\", \"total_files\", 64, \"\");\n    unsigned long min_size = dsn_config_get_value_uint64(\"fds_concurrent_test\", \"min_size\", 64, \"\");\n    unsigned long max_size = dsn_config_get_value_uint64(\"fds_concurrent_test\", \"min_size\", 64, \"\");\n\n    std::vector<std::string> filenames;\n    filenames.reserve(total_files);\n    std::vector<unsigned long> filesize;\n    filesize.reserve(total_files);\n    std::vector<std::string> md5;\n    md5.reserve(total_files);\n\n    for (int i = 0; i < total_files; ++i) {\n        char index[64];\n        snprintf(index, 64, \"%04d\", i);\n        unsigned long random_size = rand::next_u64(min_size, max_size);\n        std::string filename = \"randomfile\" + std::string(index);\n        filenames.push_back(filename);\n        filesize.push_back(random_size);\n        generate_file(filename.c_str(), random_size, block, 1024);\n\n        std::string md5result;\n        dsn::utils::filesystem::md5sum(filename, md5result);\n        md5.push_back(md5result);\n    }\n\n    printf(\"start delete phase\\n\");\n    {\n        for (unsigned int i = 0; i < total_files; ++i) {\n            _service\n                ->remove_path(remove_path_request{filenames[i], true},\n                              lpc_btest,\n                              [i, &filenames](const remove_path_response &resp) {\n                                  printf(\"file %s delete finished, err(%s)\\n\",\n                                         filenames[i].c_str(),\n                                         resp.err.to_string());\n                              },\n                              nullptr)\n                ->wait();\n        }\n    }\n\n    printf(\"start upload phase\\n\");\n    {\n        std::vector<block_file_ptr> block_files;\n        for (unsigned int i = 0; i < total_files; ++i) {\n            create_file_response cf_resp;\n            _service\n                ->create_file(create_file_request{filenames[i], true},\n                              lpc_btest,\n                              [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                              nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_NE(nullptr, cf_resp.file_handle.get());\n            block_files.push_back(cf_resp.file_handle);\n        }\n\n        std::vector<dsn::task_ptr> callbacks;\n        for (unsigned int i = 0; i < total_files; ++i) {\n            block_file_ptr p = block_files[i];\n            dsn::task_ptr t =\n                p->upload(upload_request{filenames[i]},\n                          lpc_btest,\n                          [p, &filenames, &filesize, &md5, i](const upload_response &ur) {\n                              printf(\"file %s upload finished\\n\", filenames[i].c_str());\n                              ASSERT_EQ(dsn::ERR_OK, ur.err);\n                              ASSERT_EQ(filesize[i], ur.uploaded_size);\n                              ASSERT_EQ(filesize[i], p->get_size());\n                              ASSERT_EQ(md5[i], p->get_md5sum());\n                          });\n            callbacks.push_back(t);\n        }\n\n        for (unsigned int i = 0; i < total_files; ++i) {\n            callbacks[i]->wait();\n        }\n    }\n\n    printf(\"start download phase\\n\");\n    {\n        std::vector<block_file_ptr> block_files;\n        for (unsigned int i = 0; i < total_files; ++i) {\n            create_file_response cf_resp;\n            _service\n                ->create_file(create_file_request{filenames[i], true},\n                              lpc_btest,\n                              [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                              nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_NE(nullptr, cf_resp.file_handle.get());\n            block_files.push_back(cf_resp.file_handle);\n        }\n\n        std::vector<dsn::task_ptr> callbacks;\n        for (unsigned int i = 0; i < total_files; ++i) {\n            block_file_ptr p = block_files[i];\n            dsn::task_ptr t =\n                p->download(download_request{filenames[i] + \".b\", 0, -1},\n                            lpc_btest,\n                            [&filenames, &filesize, &md5, i, p](const download_response &dr) {\n                                printf(\"file %s download finished\\n\", filenames[i].c_str());\n                                ASSERT_EQ(dsn::ERR_OK, dr.err);\n                                ASSERT_EQ(filesize[i], dr.downloaded_size);\n                                ASSERT_EQ(filesize[i], p->get_size());\n                                ASSERT_EQ(md5[i], p->get_md5sum());\n                            });\n            callbacks.push_back(t);\n        }\n\n        for (unsigned int i = 0; i < total_files; ++i) {\n            callbacks[i]->wait();\n        }\n    }\n\n    printf(\"start cleanup phase\\n\");\n    {\n        for (unsigned int i = 0; i < total_files; ++i) {\n            _service\n                ->remove_path(remove_path_request{filenames[i], true},\n                              lpc_btest,\n                              [i, &filenames](const remove_path_response &resp) {\n                                  printf(\"file %s delete finished, err(%s)\\n\",\n                                         filenames[i].c_str(),\n                                         resp.err.to_string());\n                              },\n                              nullptr)\n                ->wait();\n        }\n    }\n}\n"
  },
  {
    "path": "src/block_service/test/hdfs_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/block_service.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/rand.h>\n#include <fstream>\n#include <gtest/gtest.h>\n#include <memory>\n\n#include \"block_service/hdfs/hdfs_service.h\"\n\nusing namespace dsn;\nusing namespace dsn::dist::block_service;\n\nstatic std::string example_name_node = \"<hdfs_name_none>\";\nstatic std::string example_backup_path = \"<hdfs_path>\";\n// Please modify following paras in 'config-test.ini' to enable hdfs_service_test,\n// or hdfs_service_test will be skipped and return true.\nDSN_DEFINE_string(\"hdfs_test\", test_name_node, \"<hdfs_name_none>\", \"hdfs name node\");\nDSN_DEFINE_string(\"hdfs_test\",\n                  test_backup_path,\n                  \"<hdfs_path>\",\n                  \"path for uploading and downloading test files\");\n\nDSN_DEFINE_uint32(\"hdfs_test\", num_test_file_lines, 4096, \"number of lines in test file\");\nDSN_DEFINE_uint32(\"hdfs_test\",\n                  num_total_files_for_hdfs_concurrent_test,\n                  64,\n                  \"number of total files for hdfs concurrent test\");\n\nDEFINE_TASK_CODE(LPC_TEST_HDFS, TASK_PRIORITY_HIGH, dsn::THREAD_POOL_DEFAULT)\n\nclass HDFSClientTest : public testing::Test\n{\nprotected:\n    virtual void SetUp() override;\n    virtual void TearDown() override;\n    void generate_test_file(const char *filename);\n    void write_test_files_async(task_tracker *tracker);\n    std::string name_node;\n    std::string backup_path;\n    std::string local_test_dir;\n    std::string test_data_str;\n};\n\nvoid HDFSClientTest::SetUp()\n{\n    name_node = FLAGS_test_name_node;\n    backup_path = FLAGS_test_backup_path;\n    local_test_dir = \"test_dir\";\n    test_data_str = \"\";\n    for (int i = 0; i < FLAGS_num_test_file_lines; ++i) {\n        test_data_str += \"test\";\n    }\n}\n\nvoid HDFSClientTest::TearDown() {}\n\nvoid HDFSClientTest::generate_test_file(const char *filename)\n{\n    // generate a local test file.\n    int lines = FLAGS_num_test_file_lines;\n    FILE *fp = fopen(filename, \"wb\");\n    for (int i = 0; i < lines; ++i) {\n        fprintf(fp, \"%04d_this_is_a_simple_test_file\\n\", i);\n    }\n    fclose(fp);\n}\n\nvoid HDFSClientTest::write_test_files_async(task_tracker *tracker)\n{\n    dsn::utils::filesystem::create_directory(local_test_dir);\n    for (int i = 0; i < 100; ++i) {\n        tasking::enqueue(LPC_TEST_HDFS, tracker, [this, i]() {\n            // mock the writing process in hdfs_file_object::download().\n            std::string test_file_name = local_test_dir + \"/test_file_\" + std::to_string(i);\n            std::ofstream out(test_file_name, std::ios::binary | std::ios::out | std::ios::trunc);\n            if (out.is_open()) {\n                out.write(test_data_str.c_str(), test_data_str.length());\n            }\n            out.close();\n        });\n    }\n}\n\nTEST_F(HDFSClientTest, test_basic_operation)\n{\n    if (name_node == example_name_node || backup_path == example_backup_path) {\n        return;\n    }\n\n    std::vector<std::string> args = {name_node, backup_path};\n    std::shared_ptr<hdfs_service> s = std::make_shared<hdfs_service>();\n    ASSERT_EQ(dsn::ERR_OK, s->initialize(args));\n\n    std::string local_test_file = \"test_file\";\n    std::string remote_test_file = \"hdfs_client_test/test_file\";\n    int64_t test_file_size = 0;\n\n    generate_test_file(local_test_file.c_str());\n    dsn::utils::filesystem::file_size(local_test_file, test_file_size);\n\n    // fisrt clean up all old file in test directory.\n    printf(\"clean up all old files.\\n\");\n    remove_path_response rem_resp;\n    s->remove_path(remove_path_request{\"hdfs_client_test\", true},\n                   LPC_TEST_HDFS,\n                   [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                   nullptr)\n        ->wait();\n    ASSERT_TRUE(dsn::ERR_OK == rem_resp.err || dsn::ERR_OBJECT_NOT_FOUND == rem_resp.err);\n\n    // test upload file.\n    printf(\"create and upload: %s.\\n\", remote_test_file.c_str());\n    create_file_response cf_resp;\n    s->create_file(create_file_request{remote_test_file, true},\n                   LPC_TEST_HDFS,\n                   [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                   nullptr)\n        ->wait();\n    ASSERT_EQ(cf_resp.err, dsn::ERR_OK);\n    upload_response u_resp;\n    cf_resp.file_handle\n        ->upload(upload_request{local_test_file},\n                 LPC_TEST_HDFS,\n                 [&u_resp](const upload_response &r) { u_resp = r; },\n                 nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, u_resp.err);\n    ASSERT_EQ(test_file_size, cf_resp.file_handle->get_size());\n\n    // test list directory.\n    ls_response l_resp;\n    s->list_dir(ls_request{\"hdfs_client_test\"},\n                LPC_TEST_HDFS,\n                [&l_resp](const ls_response &resp) { l_resp = resp; },\n                nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, l_resp.err);\n    ASSERT_EQ(1, l_resp.entries->size());\n    ASSERT_EQ(\"test_file\", l_resp.entries->at(0).entry_name);\n    ASSERT_EQ(false, l_resp.entries->at(0).is_directory);\n\n    // test download file.\n    download_response d_resp;\n    printf(\"test download %s.\\n\", remote_test_file.c_str());\n    s->create_file(create_file_request{remote_test_file, false},\n                   LPC_TEST_HDFS,\n                   [&cf_resp](const create_file_response &resp) { cf_resp = resp; },\n                   nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n    ASSERT_EQ(test_file_size, cf_resp.file_handle->get_size());\n    std::string local_file_for_download = \"test_file_d\";\n    cf_resp.file_handle\n        ->download(download_request{local_file_for_download, 0, -1},\n                   LPC_TEST_HDFS,\n                   [&d_resp](const download_response &resp) { d_resp = resp; },\n                   nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, d_resp.err);\n    ASSERT_EQ(test_file_size, d_resp.downloaded_size);\n\n    // compare local_test_file and local_file_for_download.\n    int64_t file_size = 0;\n    dsn::utils::filesystem::file_size(local_file_for_download, file_size);\n    ASSERT_EQ(test_file_size, file_size);\n    std::string test_file_md5sum;\n    dsn::utils::filesystem::md5sum(local_test_file, test_file_md5sum);\n    std::string downloaded_file_md5sum;\n    dsn::utils::filesystem::md5sum(local_file_for_download, downloaded_file_md5sum);\n    ASSERT_EQ(test_file_md5sum, downloaded_file_md5sum);\n\n    // test read and write.\n    printf(\"test read write operation.\\n\");\n    std::string test_write_file = \"hdfs_client_test/test_write_file\";\n    s->create_file(create_file_request{test_write_file, false},\n                   LPC_TEST_HDFS,\n                   [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                   nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n    const char *test_buffer = \"write_hello_world_for_test\";\n    int length = strlen(test_buffer);\n    dsn::blob bb(test_buffer, 0, length);\n    write_response w_resp;\n    cf_resp.file_handle\n        ->write(write_request{bb},\n                LPC_TEST_HDFS,\n                [&w_resp](const write_response &w) { w_resp = w; },\n                nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, w_resp.err);\n    ASSERT_EQ(length, w_resp.written_size);\n    ASSERT_EQ(length, cf_resp.file_handle->get_size());\n    printf(\"test read just written contents.\\n\");\n    read_response r_resp;\n    cf_resp.file_handle\n        ->read(read_request{0, -1},\n               LPC_TEST_HDFS,\n               [&r_resp](const read_response &r) { r_resp = r; },\n               nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, r_resp.err);\n    ASSERT_EQ(length, r_resp.buffer.length());\n    ASSERT_EQ(0, memcmp(r_resp.buffer.data(), test_buffer, length));\n\n    // test partitial read.\n    cf_resp.file_handle\n        ->read(read_request{5, 10},\n               LPC_TEST_HDFS,\n               [&r_resp](const read_response &r) { r_resp = r; },\n               nullptr)\n        ->wait();\n    ASSERT_EQ(dsn::ERR_OK, r_resp.err);\n    ASSERT_EQ(10, r_resp.buffer.length());\n    ASSERT_EQ(0, memcmp(r_resp.buffer.data(), test_buffer + 5, 10));\n\n    // clean up local test files.\n    utils::filesystem::remove_path(local_test_file);\n    utils::filesystem::remove_path(local_file_for_download);\n}\n\nTEST_F(HDFSClientTest, test_concurrent_upload_download)\n{\n    if (name_node == example_name_node || backup_path == example_backup_path) {\n        return;\n    }\n\n    std::vector<std::string> args = {name_node, backup_path};\n    std::shared_ptr<hdfs_service> s = std::make_shared<hdfs_service>();\n    ASSERT_EQ(dsn::ERR_OK, s->initialize(args));\n\n    int total_files = FLAGS_num_total_files_for_hdfs_concurrent_test;\n    std::vector<std::string> local_file_names;\n    std::vector<std::string> remote_file_names;\n    std::vector<std::string> downloaded_file_names;\n\n    std::vector<int64_t> files_size;\n    std::vector<std::string> files_md5sum;\n    local_file_names.reserve(total_files);\n    remote_file_names.reserve(total_files);\n    downloaded_file_names.reserve(total_files);\n    files_size.reserve(total_files);\n    files_md5sum.reserve(total_files);\n\n    // generate test files.\n    for (int i = 0; i < total_files; ++i) {\n        std::string file_name = \"randomfile\" + std::to_string(i);\n        generate_test_file(file_name.c_str());\n        int64_t file_size = 0;\n        dsn::utils::filesystem::file_size(file_name, file_size);\n        std::string md5sum;\n        dsn::utils::filesystem::md5sum(file_name, md5sum);\n\n        local_file_names.emplace_back(file_name);\n        remote_file_names.emplace_back(\"hdfs_concurrent_test/\" + file_name);\n        downloaded_file_names.emplace_back(file_name + \"_d\");\n        files_size.emplace_back(file_size);\n        files_md5sum.emplace_back(md5sum);\n    }\n\n    printf(\"clean up all old files.\\n\");\n    remove_path_response rem_resp;\n    s->remove_path(remove_path_request{\"hdfs_concurrent_test\", true},\n                   LPC_TEST_HDFS,\n                   [&rem_resp](const remove_path_response &resp) { rem_resp = resp; },\n                   nullptr)\n        ->wait();\n    ASSERT_TRUE(dsn::ERR_OK == rem_resp.err || dsn::ERR_OBJECT_NOT_FOUND == rem_resp.err);\n\n    printf(\"test concurrent upload files.\\n\");\n    {\n        std::vector<block_file_ptr> block_files;\n        for (int i = 0; i < total_files; ++i) {\n            create_file_response cf_resp;\n            s->create_file(create_file_request{remote_file_names[i], true},\n                           LPC_TEST_HDFS,\n                           [&cf_resp](const create_file_response &resp) { cf_resp = resp; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_NE(nullptr, cf_resp.file_handle.get());\n            block_files.push_back(cf_resp.file_handle);\n        }\n\n        std::vector<dsn::task_ptr> callbacks;\n        for (int i = 0; i < total_files; ++i) {\n            block_file_ptr p = block_files[i];\n            dsn::task_ptr t =\n                p->upload(upload_request{local_file_names[i]},\n                          LPC_TEST_HDFS,\n                          [p, &local_file_names, &files_size, i](const upload_response &resp) {\n                              printf(\"file %s upload finished.\\n\", local_file_names[i].c_str());\n                              ASSERT_EQ(dsn::ERR_OK, resp.err);\n                              ASSERT_EQ(files_size[i], resp.uploaded_size);\n                              ASSERT_EQ(files_size[i], p->get_size());\n                          });\n            callbacks.push_back(t);\n        }\n\n        for (int i = 0; i < total_files; ++i) {\n            callbacks[i]->wait();\n        }\n    }\n\n    printf(\"test concurrent download files.\\n\");\n    {\n        std::vector<block_file_ptr> block_files;\n        for (int i = 0; i < total_files; ++i) {\n            create_file_response cf_resp;\n            s->create_file(create_file_request{remote_file_names[i], true},\n                           LPC_TEST_HDFS,\n                           [&cf_resp](const create_file_response &r) { cf_resp = r; },\n                           nullptr)\n                ->wait();\n            ASSERT_EQ(dsn::ERR_OK, cf_resp.err);\n            ASSERT_NE(nullptr, cf_resp.file_handle.get());\n            block_files.push_back(cf_resp.file_handle);\n        }\n\n        std::vector<dsn::task_ptr> callbacks;\n        for (int i = 0; i < total_files; ++i) {\n            block_file_ptr p = block_files[i];\n            dsn::task_ptr t = p->download(\n                download_request{downloaded_file_names[i], 0, -1},\n                LPC_TEST_HDFS,\n                [&files_md5sum, &downloaded_file_names, &files_size, i, p](\n                    const download_response &dr) {\n                    printf(\"file %s download finished\\n\", downloaded_file_names[i].c_str());\n                    ASSERT_EQ(dsn::ERR_OK, dr.err);\n                    ASSERT_EQ(files_size[i], dr.downloaded_size);\n                    ASSERT_EQ(files_size[i], p->get_size());\n                    std::string md5;\n                    dsn::utils::filesystem::md5sum(downloaded_file_names[i], md5);\n                    ASSERT_EQ(files_md5sum[i], md5);\n                });\n            callbacks.push_back(t);\n        }\n\n        for (int i = 0; i < total_files; ++i) {\n            callbacks[i]->wait();\n        }\n    }\n\n    // clean up local test files.\n    for (int i = 0; i < total_files; ++i) {\n        utils::filesystem::remove_path(local_file_names[i]);\n        utils::filesystem::remove_path(downloaded_file_names[i]);\n    }\n}\n\nTEST_F(HDFSClientTest, test_rename_path_while_writing)\n{\n    task_tracker tracker;\n    write_test_files_async(&tracker);\n    usleep(100);\n    std::string rename_dir = \"rename_dir.\" + std::to_string(dsn_now_ms());\n    // rename succeed but writing failed.\n    ASSERT_TRUE(dsn::utils::filesystem::rename_path(local_test_dir, rename_dir));\n    tracker.cancel_outstanding_tasks();\n}\n\n#ifndef SANITIZER\nTEST_F(HDFSClientTest, test_remove_path_while_writing)\n{\n    task_tracker tracker;\n    write_test_files_async(&tracker);\n    usleep(100);\n    // couldn't remove the directory while writing files in it.\n    ASSERT_FALSE(dsn::utils::filesystem::remove_path(local_test_dir));\n    tracker.cancel_outstanding_tasks();\n}\n#endif\n"
  },
  {
    "path": "src/block_service/test/local_service_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <gtest/gtest.h>\n#include <boost/filesystem.hpp>\n#include <nlohmann/json.hpp>\n\n#include \"block_service/local/local_service.h\"\n\nnamespace dsn {\nnamespace dist {\nnamespace block_service {\n\n// Simple tests for nlohmann::json serialization, via NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE.\n\nTEST(local_service, store_metadata)\n{\n    local_file_object file(\"a.txt\");\n    error_code ec = file.store_metadata();\n    ASSERT_EQ(ec, ERR_OK);\n\n    auto meta_file_path = local_service::get_metafile(file.file_name());\n    ASSERT_TRUE(boost::filesystem::exists(meta_file_path));\n\n    std::ifstream ifs(meta_file_path);\n    nlohmann::json j;\n    ifs >> j;\n    ASSERT_EQ(j[\"md5\"], \"\");\n    ASSERT_EQ(j[\"size\"], 0);\n}\n\nTEST(local_service, load_metadata)\n{\n    local_file_object file(\"a.txt\");\n    auto meta_file_path = local_service::get_metafile(file.file_name());\n\n    {\n        std::ofstream ofs(meta_file_path);\n        nlohmann::json j({{\"md5\", \"abcde\"}, {\"size\", 5}});\n        ofs << j;\n        ofs.close();\n\n        ASSERT_EQ(file.load_metadata(), ERR_OK);\n        ASSERT_EQ(file.get_md5sum(), \"abcde\");\n        ASSERT_EQ(file.get_size(), 5);\n    }\n\n    {\n        std::ofstream ofs(meta_file_path);\n        ofs << \"invalid json string\";\n        ofs.close();\n\n        local_file_object file2(\"a.txt\");\n        ASSERT_EQ(file2.load_metadata(), ERR_FS_INTERNAL);\n    }\n\n    {\n        std::ofstream ofs(meta_file_path);\n        nlohmann::json j({{\"md5\", \"abcde\"}, {\"no such key\", \"illegal\"}});\n        ofs << j;\n        ofs.close();\n\n        local_file_object file2(\"a.txt\");\n        ASSERT_EQ(file2.load_metadata(), ERR_FS_INTERNAL);\n    }\n}\n\n} // namespace block_service\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/block_service/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/block_service/test/run.sh",
    "content": "#!/bin/sh\n\n##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn_block_service_test.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_block_service_test\n"
  },
  {
    "path": "src/client/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_client)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n"
  },
  {
    "path": "src/client/partition_resolver.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/dist/replication/partition_resolver.h>\n#include \"partition_resolver_simple.h\"\n#include \"partition_resolver_manager.h\"\n\nnamespace dsn {\nnamespace replication {\n/*static*/\npartition_resolver_ptr partition_resolver::get_resolver(const char *cluster_name,\n                                                        const std::vector<rpc_address> &meta_list,\n                                                        const char *app_name)\n{\n    return partition_resolver_manager::instance().find_or_create(cluster_name, meta_list, app_name);\n}\n\nDEFINE_TASK_CODE(LPC_RPC_DELAY_CALL, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nstatic inline bool error_retry(error_code err)\n{\n    return (err != ERR_HANDLER_NOT_FOUND && err != ERR_APP_NOT_EXIST &&\n            err != ERR_OPERATION_DISABLED && err != ERR_BUSY && err != ERR_SPLITTING &&\n            err != ERR_DISK_INSUFFICIENT);\n}\n\nvoid partition_resolver::call_task(const rpc_response_task_ptr &t)\n{\n    auto &hdr = *(t->get_request()->header);\n    uint64_t deadline_ms = dsn_now_ms() + hdr.client.timeout_ms;\n\n    rpc_response_handler old_callback;\n    t->fetch_current_handler(old_callback);\n    auto new_callback = [ this, deadline_ms, oc = std::move(old_callback) ](\n        dsn::error_code err, dsn::message_ex * req, dsn::message_ex * resp)\n    {\n        if (req->header->gpid.value() != 0 && err != ERR_OK && error_retry(err)) {\n            on_access_failure(req->header->gpid.get_partition_index(), err);\n            // still got time, retry\n            uint64_t nms = dsn_now_ms();\n            uint64_t gap = 8 << req->send_retry_count;\n            if (gap > 1000)\n                gap = 1000;\n            if (nms + gap < deadline_ms) {\n                req->send_retry_count++;\n                req->header->client.timeout_ms = static_cast<int>(deadline_ms - nms - gap);\n\n                rpc_response_task_ptr ctask =\n                    dynamic_cast<rpc_response_task *>(task::get_current_task());\n                partition_resolver_ptr r(this);\n\n                dassert(ctask != nullptr, \"current task must be rpc_response_task\");\n                ctask->replace_callback(std::move(oc));\n                dassert(ctask->set_retry(false),\n                        \"rpc_response_task set retry failed, state = %s\",\n                        enum_to_string(ctask->state()));\n\n                // sleep gap milliseconds before retry\n                tasking::enqueue(LPC_RPC_DELAY_CALL,\n                                 nullptr,\n                                 [r, ctask]() { r->call_task(ctask); },\n                                 0,\n                                 std::chrono::milliseconds(gap));\n                return;\n            } else {\n                derror(\"service access failed (%s), no more time for further \"\n                       \"tries, set error = ERR_TIMEOUT, trace_id = %016\" PRIx64,\n                       err.to_string(),\n                       req->header->trace_id);\n                err = ERR_TIMEOUT;\n            }\n        }\n\n        if (oc)\n            oc(err, req, resp);\n    };\n    t->replace_callback(std::move(new_callback));\n\n    resolve(hdr.client.partition_hash,\n            [t](resolve_result &&result) mutable {\n                if (result.err != ERR_OK) {\n                    t->enqueue(result.err, nullptr);\n                    return;\n                }\n\n                // update gpid when necessary\n                auto &hdr = *(t->get_request()->header);\n                if (hdr.gpid.value() != result.pid.value()) {\n                    if (hdr.client.thread_hash == 0 // thread_hash is not assigned by applications\n                        ||\n                        hdr.gpid.value() != 0 // requests set to child redirect to parent\n                        ) {\n                        hdr.client.thread_hash = result.pid.thread_hash();\n                    }\n                    hdr.gpid = result.pid;\n                }\n                dsn_rpc_call(result.address, t.get());\n            },\n            hdr.client.timeout_ms);\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/client/partition_resolver_manager.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <algorithm>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/dist/replication/partition_resolver.h>\n#include \"partition_resolver_manager.h\"\n#include \"partition_resolver_simple.h\"\n\nnamespace dsn {\nnamespace replication {\n\ntemplate <typename T>\nbool vector_equal(const std::vector<T> &a, const std::vector<T> &b)\n{\n    if (a.size() != b.size())\n        return false;\n    for (const T &item : a) {\n        if (std::find(b.begin(), b.end(), item) == b.end())\n            return false;\n    }\n    for (const T &item : b) {\n        if (std::find(a.begin(), a.end(), item) == a.end())\n            return false;\n    }\n    return true;\n}\n\npartition_resolver_ptr partition_resolver_manager::find_or_create(\n    const char *cluster_name, const std::vector<rpc_address> &meta_list, const char *app_name)\n{\n    dsn::zauto_lock l(_lock);\n    std::map<std::string, partition_resolver_ptr> &app_map = _resolvers[cluster_name];\n    partition_resolver_ptr &ptr = app_map[app_name];\n\n    if (ptr == nullptr) {\n        dsn::rpc_address meta_group;\n        meta_group.assign_group(cluster_name);\n        meta_group.group_address()->add_list(meta_list);\n        ptr = new partition_resolver_simple(meta_group, app_name);\n        return ptr;\n    } else {\n        dsn::rpc_address meta_group = ptr->get_meta_server();\n        const std::vector<dsn::rpc_address> &existing_list = meta_group.group_address()->members();\n        if (!vector_equal(meta_list, existing_list)) {\n            derror(\"meta list not match for cluster(%s)\", cluster_name);\n            return nullptr;\n        }\n        return ptr;\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/client/partition_resolver_manager.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <vector>\n#include <map>\n#include <dsn/utility/singleton.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/dist/replication/partition_resolver.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass partition_resolver_manager : public dsn::utils::singleton<partition_resolver_manager>\n{\npublic:\n    partition_resolver_ptr find_or_create(const char *cluster_name,\n                                          const std::vector<rpc_address> &meta_list,\n                                          const char *app_name);\n\nprivate:\n    dsn::zlock _lock;\n    // cluster_name -> <app_name, resolver>\n    std::map<std::string, std::map<std::string, partition_resolver_ptr>> _resolvers;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/client/partition_resolver_simple.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/rand.h>\n#include <dsn/tool-api/async_calls.h>\n#include \"partition_resolver_simple.h\"\n\nnamespace dsn {\nnamespace replication {\n\npartition_resolver_simple::partition_resolver_simple(rpc_address meta_server, const char *app_name)\n    : partition_resolver(meta_server, app_name),\n      _app_id(-1),\n      _app_partition_count(-1),\n      _app_is_stateful(true)\n{\n}\n\nvoid partition_resolver_simple::resolve(uint64_t partition_hash,\n                                        std::function<void(resolve_result &&)> &&callback,\n                                        int timeout_ms)\n{\n    int idx = -1;\n    if (_app_partition_count != -1) {\n        idx = get_partition_index(_app_partition_count, partition_hash);\n        rpc_address target;\n        auto err = get_address(idx, target);\n        if (dsn_unlikely(err == ERR_CHILD_NOT_READY)) {\n            // child partition is not ready, its requests should be sent to parent partition\n            idx -= _app_partition_count / 2;\n            err = get_address(idx, target);\n        }\n        if (dsn_likely(err == ERR_OK)) {\n            callback(resolve_result{ERR_OK, target, {_app_id, idx}});\n            return;\n        }\n    }\n\n    auto rc = new request_context();\n    rc->partition_hash = partition_hash;\n    rc->callback = std::move(callback);\n    rc->partition_index = idx;\n    rc->timeout_timer = nullptr;\n    rc->timeout_ms = timeout_ms;\n    rc->timeout_ts_us = dsn_now_us() + timeout_ms * 1000;\n    rc->completed = false;\n\n    call(std::move(rc), false);\n}\n\nvoid partition_resolver_simple::on_access_failure(int partition_index, error_code err)\n{\n    // ERR_CAPACITY_EXCEEDED : no need for reconfiguration on primary\n    // ERR_NOT_ENOUGH_MEMBER : primary won't change and we only r/w on primary in this provider\n    if (-1 == partition_index || err == ERR_CAPACITY_EXCEEDED || err == ERR_NOT_ENOUGH_MEMBER) {\n        return;\n    }\n\n    zauto_write_lock l(_config_lock);\n    if (err == ERR_PARENT_PARTITION_MISUSED) {\n        ddebug_f(\"clear all partition configuration cache due to access failure {} at {}.{}\",\n                 err,\n                 _app_id,\n                 partition_index);\n        _app_partition_count = -1;\n    } else {\n        ddebug_f(\"clear partition configuration cache {}.{} due to access failure {}\",\n                 _app_id,\n                 partition_index,\n                 err);\n        _config_cache.erase(partition_index);\n    }\n}\n\npartition_resolver_simple::~partition_resolver_simple()\n{\n    _tracker.cancel_outstanding_tasks();\n    clear_all_pending_requests();\n}\n\nvoid partition_resolver_simple::clear_all_pending_requests()\n{\n    dinfo(\"%s.client: clear all pending tasks\", _app_name.c_str());\n    zauto_lock l(_requests_lock);\n    // clear _pending_requests\n    for (auto &pc : _pending_requests) {\n        if (pc.second->query_config_task != nullptr)\n            pc.second->query_config_task->cancel(true);\n\n        for (auto &rc : pc.second->requests) {\n            end_request(std::move(rc), ERR_TIMEOUT, rpc_address());\n        }\n        delete pc.second;\n    }\n    _pending_requests.clear();\n}\n\nvoid partition_resolver_simple::on_timeout(request_context_ptr &&rc) const\n{\n    end_request(std::move(rc), ERR_TIMEOUT, rpc_address(), true);\n}\n\nvoid partition_resolver_simple::end_request(request_context_ptr &&request,\n                                            error_code err,\n                                            rpc_address addr,\n                                            bool called_by_timer) const\n{\n    zauto_lock l(request->lock);\n    if (request->completed) {\n        return;\n    }\n\n    if (!called_by_timer && request->timeout_timer != nullptr)\n        request->timeout_timer->cancel(false);\n\n    request->callback(resolve_result{err, addr, {_app_id, request->partition_index}});\n    request->completed = true;\n}\n\nDEFINE_TASK_CODE(LPC_REPLICATION_CLIENT_REQUEST_TIMEOUT, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE(LPC_REPLICATION_DELAY_QUERY_CONFIG, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nvoid partition_resolver_simple::call(request_context_ptr &&request, bool from_meta_ack)\n{\n    int pindex = request->partition_index;\n    if (-1 != pindex) {\n        // fill target address if possible\n        rpc_address addr;\n        auto err = get_address(pindex, addr);\n\n        // target address known\n        if (err == ERR_OK) {\n            end_request(std::move(request), ERR_OK, addr);\n            return;\n        }\n    }\n\n    auto nts = dsn_now_us();\n\n    // timeout will happen very soon, no way to get the rpc call done\n    if (nts + 100 >= request->timeout_ts_us) // within 100 us\n    {\n        end_request(std::move(request), ERR_TIMEOUT, rpc_address());\n        return;\n    }\n\n    // delay 1 second for further config query\n    if (from_meta_ack) {\n        tasking::enqueue(LPC_REPLICATION_DELAY_QUERY_CONFIG,\n                         &_tracker,\n                         [ =, req2 = request ]() mutable { call(std::move(req2), false); },\n                         0,\n                         std::chrono::seconds(1));\n        return;\n    }\n\n    // calculate timeout\n    int timeout_ms;\n    if (nts + 1000 >= request->timeout_ts_us)\n        timeout_ms = 1;\n    else\n        timeout_ms = static_cast<int>(request->timeout_ts_us - nts) / 1000;\n\n    // init timeout timer only when necessary\n    {\n        zauto_lock l(request->lock);\n        if (request->timeout_timer == nullptr) {\n            request->timeout_timer =\n                tasking::enqueue(LPC_REPLICATION_CLIENT_REQUEST_TIMEOUT,\n                                 &_tracker,\n                                 [ =, req2 = request ]() mutable { on_timeout(std::move(req2)); },\n                                 0,\n                                 std::chrono::milliseconds(timeout_ms));\n        }\n    }\n\n    {\n        zauto_lock l(_requests_lock);\n        if (-1 != pindex) {\n            // put into pending queue of querying target partition\n            auto it = _pending_requests.find(pindex);\n            if (it == _pending_requests.end()) {\n                auto pc = new partition_context();\n                it = _pending_requests.emplace(pindex, pc).first;\n            }\n            it->second->requests.push_back(std::move(request));\n\n            // init configuration query task if necessary\n            if (nullptr == it->second->query_config_task) {\n                it->second->query_config_task = query_config(pindex, timeout_ms);\n            }\n        } else {\n            _pending_requests_before_partition_count_unknown.push_back(std::move(request));\n            if (_pending_requests_before_partition_count_unknown.size() == 1) {\n                _query_config_task = query_config(pindex, timeout_ms);\n            }\n        }\n    }\n}\n\n/*send rpc*/\nDEFINE_TASK_CODE_RPC(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX,\n                     TASK_PRIORITY_COMMON,\n                     THREAD_POOL_DEFAULT)\n\ntask_ptr partition_resolver_simple::query_config(int partition_index, int timeout_ms)\n{\n    dinfo(\"%s.client: start query config, gpid = %d.%d, timeout_ms = %d\",\n          _app_name.c_str(),\n          _app_id,\n          partition_index,\n          timeout_ms);\n    task_spec *sp = task_spec::get(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n    if (timeout_ms >= sp->rpc_timeout_milliseconds)\n        timeout_ms = 0;\n    auto msg = dsn::message_ex::create_request(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, timeout_ms);\n\n    configuration_query_by_index_request req;\n    req.app_name = _app_name;\n    if (partition_index != -1) {\n        req.partition_indices.push_back(partition_index);\n    }\n    marshall(msg, req);\n\n    return rpc::call(\n        _meta_server,\n        msg,\n        &_tracker,\n        [this, partition_index](error_code err, dsn::message_ex *req, dsn::message_ex *resp) {\n            query_config_reply(err, req, resp, partition_index);\n        });\n}\n\nvoid partition_resolver_simple::query_config_reply(error_code err,\n                                                   dsn::message_ex *request,\n                                                   dsn::message_ex *response,\n                                                   int partition_index)\n{\n    auto client_err = ERR_OK;\n\n    if (err == ERR_OK) {\n        configuration_query_by_index_response resp;\n        unmarshall(response, resp);\n        if (resp.err == ERR_OK) {\n            zauto_write_lock l(_config_lock);\n\n            if (_app_id != -1 && _app_id != resp.app_id) {\n                dwarn_f(\"app id is changed (mostly the app was removed and created with the same \"\n                        \"name), local Vs remote: %u vs %u \",\n                        _app_id,\n                        resp.app_id);\n            }\n            if (_app_partition_count != -1 && _app_partition_count != resp.partition_count &&\n                _app_partition_count * 2 != resp.partition_count &&\n                _app_partition_count != resp.partition_count * 2) {\n                dwarn_f(\"partition count is changed (mostly the app was removed and created with \"\n                        \"the same name), local Vs remote: %u vs %u \",\n                        _app_partition_count,\n                        resp.partition_count);\n            }\n            _app_id = resp.app_id;\n            _app_partition_count = resp.partition_count;\n            _app_is_stateful = resp.is_stateful;\n\n            for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) {\n                auto &new_config = *it;\n\n                dinfo(\"%s.client: query config reply, gpid = %d.%d, ballot = %\" PRId64\n                      \", primary = %s\",\n                      _app_name.c_str(),\n                      new_config.pid.get_app_id(),\n                      new_config.pid.get_partition_index(),\n                      new_config.ballot,\n                      new_config.primary.to_string());\n\n                auto it2 = _config_cache.find(new_config.pid.get_partition_index());\n                if (it2 == _config_cache.end()) {\n                    std::unique_ptr<partition_info> pi(new partition_info);\n                    pi->timeout_count = 0;\n                    pi->config = new_config;\n                    _config_cache.emplace(new_config.pid.get_partition_index(), std::move(pi));\n                } else if (_app_is_stateful && it2->second->config.ballot < new_config.ballot) {\n                    it2->second->timeout_count = 0;\n                    it2->second->config = new_config;\n                } else if (!_app_is_stateful) {\n                    it2->second->timeout_count = 0;\n                    it2->second->config = new_config;\n                } else {\n                    // nothing to do\n                }\n            }\n        } else if (resp.err == ERR_OBJECT_NOT_FOUND) {\n            derror(\"%s.client: query config reply, gpid = %d.%d, err = %s\",\n                   _app_name.c_str(),\n                   _app_id,\n                   partition_index,\n                   resp.err.to_string());\n\n            client_err = ERR_APP_NOT_EXIST;\n        } else {\n            derror(\"%s.client: query config reply, gpid = %d.%d, err = %s\",\n                   _app_name.c_str(),\n                   _app_id,\n                   partition_index,\n                   resp.err.to_string());\n\n            client_err = resp.err;\n        }\n    } else {\n        derror(\"%s.client: query config reply, gpid = %d.%d, err = %s\",\n               _app_name.c_str(),\n               _app_id,\n               partition_index,\n               err.to_string());\n    }\n\n    // get specific or all partition update\n    if (partition_index != -1) {\n        partition_context *pc = nullptr;\n        {\n            zauto_lock l(_requests_lock);\n            auto it = _pending_requests.find(partition_index);\n            if (it != _pending_requests.end()) {\n                pc = it->second;\n                _pending_requests.erase(partition_index);\n            }\n        }\n\n        if (pc) {\n            handle_pending_requests(pc->requests, client_err);\n            delete pc;\n        }\n    }\n\n    // get all partition update\n    else {\n        pending_replica_requests reqs;\n        std::deque<request_context_ptr> reqs2;\n        {\n            zauto_lock l(_requests_lock);\n            reqs.swap(_pending_requests);\n            reqs2.swap(_pending_requests_before_partition_count_unknown);\n        }\n\n        if (!reqs2.empty()) {\n            if (_app_partition_count != -1) {\n                for (auto &req : reqs2) {\n                    dassert(req->partition_index == -1,\n                            \"invalid partition_index, index = %d\",\n                            req->partition_index);\n                    req->partition_index =\n                        get_partition_index(_app_partition_count, req->partition_hash);\n                }\n            }\n            handle_pending_requests(reqs2, client_err);\n        }\n\n        for (auto &r : reqs) {\n            if (r.second) {\n                handle_pending_requests(r.second->requests, client_err);\n                delete r.second;\n            }\n        }\n    }\n}\n\nvoid partition_resolver_simple::handle_pending_requests(std::deque<request_context_ptr> &reqs,\n                                                        error_code err)\n{\n    for (auto &req : reqs) {\n        if (err == ERR_OK) {\n            rpc_address addr;\n            err = get_address(req->partition_index, addr);\n            if (err == ERR_OK) {\n                end_request(std::move(req), err, addr);\n            } else {\n                call(std::move(req), true);\n            }\n        } else if (err == ERR_HANDLER_NOT_FOUND || err == ERR_APP_NOT_EXIST ||\n                   err == ERR_OPERATION_DISABLED) {\n            end_request(std::move(req), err, rpc_address());\n        } else {\n            call(std::move(req), true);\n        }\n    }\n    reqs.clear();\n}\n\n/*search in cache*/\nrpc_address partition_resolver_simple::get_address(const partition_configuration &config) const\n{\n    if (_app_is_stateful) {\n        return config.primary;\n    } else {\n        if (config.last_drops.size() == 0) {\n            return rpc_address();\n        } else {\n            return config.last_drops[rand::next_u32(0, config.last_drops.size() - 1)];\n        }\n    }\n}\n\nerror_code partition_resolver_simple::get_address(int partition_index, /*out*/ rpc_address &addr)\n{\n    // partition_configuration config;\n    {\n        zauto_read_lock l(_config_lock);\n        auto it = _config_cache.find(partition_index);\n        if (it != _config_cache.end()) {\n            // config = it->second->config;\n            if (it->second->config.ballot < 0) {\n                // client query config for splitting app, child partition is not ready\n                return ERR_CHILD_NOT_READY;\n            }\n            addr = get_address(it->second->config);\n            if (addr.is_invalid()) {\n                return ERR_IO_PENDING;\n            } else {\n                return ERR_OK;\n            }\n        } else {\n            return ERR_OBJECT_NOT_FOUND;\n        }\n    }\n}\n\nint partition_resolver_simple::get_partition_index(int partition_count, uint64_t partition_hash)\n{\n    return partition_hash % static_cast<uint64_t>(partition_count);\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/client/partition_resolver_simple.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/service_api_c.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n#include <dsn/dist/replication/partition_resolver.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass partition_resolver_simple : public partition_resolver\n{\npublic:\n    partition_resolver_simple(rpc_address meta_server, const char *app_name);\n\n    virtual ~partition_resolver_simple();\n\n    virtual void resolve(uint64_t partition_hash,\n                         std::function<void(resolve_result &&)> &&callback,\n                         int timeout_ms) override;\n\n    virtual void on_access_failure(int partition_index, error_code err) override;\n\n    virtual int get_partition_index(int partition_count, uint64_t partition_hash) override;\n\n    int get_partition_count() const { return _app_partition_count; }\n\nprivate:\n    struct partition_info\n    {\n        int timeout_count;\n        ::dsn::partition_configuration config;\n    };\n    mutable dsn::zrwlock_nr _config_lock;\n    std::unordered_map<int, std::unique_ptr<partition_info>> _config_cache;\n\n    int _app_id;\n    int _app_partition_count;\n    bool _app_is_stateful;\n\n    typedef std::function<void(resolve_result &&)> callback_t;\n    struct request_context : ref_counter\n    {\n        int partition_index;\n        uint64_t partition_hash;\n        callback_t callback;\n        int timeout_ms;         // init timeout\n        uint64_t timeout_ts_us; // timeout at this timing point\n\n        zlock lock;             // [\n        task_ptr timeout_timer; // when partition config is unknown at the first place\n        bool completed;\n        // ]\n    };\n    typedef ref_ptr<request_context> request_context_ptr;\n\n    struct partition_context\n    {\n        task_ptr query_config_task;\n        std::deque<request_context_ptr> requests;\n    };\n\n    typedef std::unordered_map<int, partition_context *> pending_replica_requests;\n\n    mutable zlock _requests_lock;\n    pending_replica_requests _pending_requests;\n    std::deque<request_context_ptr> _pending_requests_before_partition_count_unknown;\n    task_ptr _query_config_task;\n\n    dsn::task_tracker _tracker;\n\nprivate:\n    // local routines\n    rpc_address get_address(const partition_configuration &config) const;\n    error_code get_address(int partition_index, /*out*/ rpc_address &addr);\n    void handle_pending_requests(std::deque<request_context_ptr> &reqs, error_code err);\n    void clear_all_pending_requests();\n\n    // with replica\n    void call(request_context_ptr &&request, bool from_meta_ack = false);\n    // void replica_rw_reply(error_code err, dsn::message_ex* request, dsn::message_ex* response,\n    // request_context_ptr rc);\n    void end_request(request_context_ptr &&request,\n                     error_code err,\n                     rpc_address addr,\n                     bool called_by_timer = false) const;\n    void on_timeout(request_context_ptr &&rc) const;\n\n    // with meta server\n    task_ptr query_config(int partition_index, int timeout_ms);\n    void query_config_reply(error_code err,\n                            dsn::message_ex *request,\n                            dsn::message_ex *response,\n                            int partition_index);\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/client/replication_ddl_client.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/replication_ddl_client.h>\n\n#include <arpa/inet.h>\n#include <netdb.h>\n#include <sys/socket.h>\n\n#include <fstream>\n#include <iomanip>\n#include <iostream>\n\n#include <boost/lexical_cast.hpp>\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/utility/output_utils.h>\n#include <fmt/format.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"common/replication_common.h\"\n#include \"common/bulk_load_common.h\"\n#include \"common/partition_split_common.h\"\n#include \"common/manual_compact.h\"\n#include \"meta/meta_rpc_types.h\"\n\nnamespace dsn {\nnamespace replication {\n\nusing tp_output_format = ::dsn::utils::table_printer::output_format;\n\nreplication_ddl_client::replication_ddl_client(const std::vector<dsn::rpc_address> &meta_servers)\n{\n    _meta_server.assign_group(\"meta-servers\");\n    for (auto &m : meta_servers) {\n        _meta_server.group_address()->add(m);\n    }\n}\n\nreplication_ddl_client::~replication_ddl_client() { _tracker.cancel_outstanding_tasks(); }\n\ndsn::error_code replication_ddl_client::wait_app_ready(const std::string &app_name,\n                                                       int partition_count,\n                                                       int max_replica_count)\n{\n    int sleep_sec = 2;\n    while (true) {\n        std::this_thread::sleep_for(std::chrono::seconds(sleep_sec));\n\n        std::shared_ptr<configuration_query_by_index_request> query_req(\n            new configuration_query_by_index_request());\n        query_req->app_name = app_name;\n\n        auto query_task = request_meta<configuration_query_by_index_request>(\n            RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, query_req);\n        query_task->wait();\n        if (query_task->error() == ERR_INVALID_STATE) {\n            std::cout << app_name << \" not ready yet, still waiting...\" << std::endl;\n            continue;\n        }\n\n        if (query_task->error() != dsn::ERR_OK) {\n            std::cout << \"create app \" << app_name\n                      << \" failed: [query] call server error: \" << query_task->error().to_string()\n                      << std::endl;\n            return query_task->error();\n        }\n\n        dsn::configuration_query_by_index_response query_resp;\n        ::dsn::unmarshall(query_task->get_response(), query_resp);\n        if (query_resp.err != dsn::ERR_OK) {\n            std::cout << \"create app \" << app_name\n                      << \" failed: [query] received server error: \" << query_resp.err.to_string()\n                      << std::endl;\n            return query_resp.err;\n        }\n        dassert(partition_count == query_resp.partition_count, \"partition count not equal\");\n        int ready_count = 0;\n        for (int i = 0; i < partition_count; i++) {\n            const partition_configuration &pc = query_resp.partitions[i];\n            if (!pc.primary.is_invalid() && (pc.secondaries.size() + 1 >= max_replica_count)) {\n                ready_count++;\n            }\n        }\n        if (ready_count == partition_count) {\n            std::cout << app_name << \" is ready now: (\" << ready_count << \"/\" << partition_count\n                      << \")\" << std::endl;\n            break;\n        }\n        std::cout << app_name << \" not ready yet, still waiting... (\" << ready_count << \"/\"\n                  << partition_count << \")\" << std::endl;\n    }\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replication_ddl_client::create_app(const std::string &app_name,\n                                                   const std::string &app_type,\n                                                   int partition_count,\n                                                   int replica_count,\n                                                   const std::map<std::string, std::string> &envs,\n                                                   bool is_stateless)\n{\n    if (partition_count < 1) {\n        std::cout << \"create app \" << app_name << \" failed: partition_count should >= 1\"\n                  << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    if (replica_count < 1) {\n        std::cout << \"create app \" << app_name << \" failed: replica_count should >= 1\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    if (app_name.empty() ||\n        !std::all_of(app_name.cbegin(),\n                     app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char)) {\n        std::cout << \"create app \" << app_name << \" failed: invalid app_name\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    if (app_type.empty() ||\n        !std::all_of(app_type.cbegin(),\n                     app_type.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char)) {\n        std::cout << \"create app \" << app_name << \" failed: invalid app_type\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    std::shared_ptr<configuration_create_app_request> req(new configuration_create_app_request());\n    req->app_name = app_name;\n    req->options.partition_count = partition_count;\n    req->options.replica_count = replica_count;\n    req->options.success_if_exist = true;\n    req->options.app_type = app_type;\n    req->options.envs = envs;\n    req->options.is_stateful = !is_stateless;\n\n    auto resp_task = request_meta<configuration_create_app_request>(RPC_CM_CREATE_APP, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        std::cout << \"create app \" << app_name\n                  << \" failed: [create] call server error: \" << resp_task->error().to_string()\n                  << std::endl;\n        return resp_task->error();\n    }\n\n    dsn::replication::configuration_create_app_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        std::cout << \"create app \" << app_name\n                  << \" failed: [create] received server error: \" << resp.err.to_string()\n                  << std::endl;\n        return resp.err;\n    }\n\n    std::cout << \"create app \" << app_name << \" succeed, waiting for app ready\" << std::endl;\n\n    dsn::error_code error = wait_app_ready(app_name, partition_count, replica_count);\n    if (error == dsn::ERR_OK)\n        std::cout << app_name << \" is ready now!\" << std::endl;\n    return error;\n}\n\ndsn::error_code replication_ddl_client::drop_app(const std::string &app_name, int reserve_seconds)\n{\n    if (app_name.empty() ||\n        !std::all_of(app_name.cbegin(),\n                     app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char))\n        return ERR_INVALID_PARAMETERS;\n\n    std::shared_ptr<configuration_drop_app_request> req(new configuration_drop_app_request());\n    req->app_name = app_name;\n    req->options.success_if_not_exist = true;\n    req->options.__set_reserve_seconds(reserve_seconds);\n\n    auto resp_task = request_meta<configuration_drop_app_request>(RPC_CM_DROP_APP, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    dsn::replication::configuration_drop_app_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replication_ddl_client::recall_app(int32_t app_id, const std::string &new_app_name)\n{\n    if (!std::all_of(new_app_name.cbegin(),\n                     new_app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char))\n        return ERR_INVALID_PARAMETERS;\n\n    std::shared_ptr<configuration_recall_app_request> req =\n        std::make_shared<configuration_recall_app_request>();\n    req->app_id = app_id;\n    req->new_app_name = new_app_name;\n\n    auto resp_task = request_meta<configuration_recall_app_request>(RPC_CM_RECALL_APP, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK)\n        return resp_task->error();\n\n    dsn::replication::configuration_recall_app_response resp;\n    dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK)\n        return resp.err;\n    std::cout << \"recall app ok, id(\" << resp.info.app_id << \"), \"\n              << \"name(\" << resp.info.app_name << \"), \"\n              << \"partition_count(\" << resp.info.partition_count << \"), wait it ready\" << std::endl;\n    return wait_app_ready(\n        resp.info.app_name, resp.info.partition_count, resp.info.max_replica_count);\n}\n\ndsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type status,\n                                                  std::vector<::dsn::app_info> &apps)\n{\n    std::shared_ptr<configuration_list_apps_request> req(new configuration_list_apps_request());\n    req->status = status;\n\n    auto resp_task = request_meta<configuration_list_apps_request>(RPC_CM_LIST_APPS, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    dsn::replication::configuration_list_apps_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    apps = resp.infos;\n\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replication_ddl_client::list_apps(const dsn::app_status::type status,\n                                                  bool show_all,\n                                                  bool detailed,\n                                                  bool json,\n                                                  const std::string &file_name)\n{\n    std::vector<::dsn::app_info> apps;\n    auto r = list_apps(status, apps);\n    if (r != dsn::ERR_OK) {\n        return r;\n    }\n\n    // print configuration_list_apps_response\n    std::streambuf *buf;\n    std::ofstream of;\n\n    if (!file_name.empty()) {\n        of.open(file_name);\n        buf = of.rdbuf();\n    } else {\n        buf = std::cout.rdbuf();\n    }\n    std::ostream out(buf);\n\n    size_t max_app_name_size = 20;\n    for (int i = 0; i < apps.size(); i++) {\n        dsn::app_info info = apps[i];\n        if (!show_all && info.status != app_status::AS_AVAILABLE) {\n            continue;\n        }\n        max_app_name_size = std::max(max_app_name_size, info.app_name.size() + 2);\n    }\n\n    dsn::utils::multi_table_printer mtp;\n    dsn::utils::table_printer tp_general(\"general_info\");\n    tp_general.add_title(\"app_id\");\n    tp_general.add_column(\"status\");\n    tp_general.add_column(\"app_name\");\n    tp_general.add_column(\"app_type\");\n    tp_general.add_column(\"partition_count\");\n    tp_general.add_column(\"replica_count\");\n    tp_general.add_column(\"is_stateful\");\n    tp_general.add_column(\"create_time\");\n    tp_general.add_column(\"drop_time\");\n    tp_general.add_column(\"drop_expire\");\n    tp_general.add_column(\"envs_count\");\n\n    int available_app_count = 0;\n    for (int i = 0; i < apps.size(); i++) {\n        dsn::app_info info = apps[i];\n        if (!show_all && info.status != app_status::AS_AVAILABLE) {\n            continue;\n        }\n        std::string status_str = enum_to_string(info.status);\n        status_str = status_str.substr(status_str.find(\"AS_\") + 3);\n        std::string create_time = \"-\";\n        if (info.create_second > 0) {\n            char buf[20];\n            dsn::utils::time_ms_to_date_time((uint64_t)info.create_second * 1000, buf, 20);\n            buf[10] = '_';\n            create_time = buf;\n        }\n        std::string drop_time = \"-\";\n        std::string drop_expire_time = \"-\";\n        if (info.status == app_status::AS_AVAILABLE) {\n            available_app_count++;\n        } else if (info.status == app_status::AS_DROPPED && info.expire_second > 0) {\n            if (info.drop_second > 0) {\n                char buf[20];\n                dsn::utils::time_ms_to_date_time((uint64_t)info.drop_second * 1000, buf, 20);\n                buf[10] = '_';\n                drop_time = buf;\n            }\n            if (info.expire_second > 0) {\n                char buf[20];\n                dsn::utils::time_ms_to_date_time((uint64_t)info.expire_second * 1000, buf, 20);\n                buf[10] = '_';\n                drop_expire_time = buf;\n            }\n        }\n        tp_general.add_row(info.app_id);\n        tp_general.append_data(status_str);\n        tp_general.append_data(info.app_name);\n        tp_general.append_data(info.app_type);\n        tp_general.append_data(info.partition_count);\n        tp_general.append_data(info.max_replica_count);\n        tp_general.append_data(info.is_stateful);\n        tp_general.append_data(create_time);\n        tp_general.append_data(drop_time);\n        tp_general.append_data(drop_expire_time);\n        tp_general.append_data(info.envs.size());\n    }\n    mtp.add(std::move(tp_general));\n\n    int total_fully_healthy_app_count = 0;\n    int total_unhealthy_app_count = 0;\n    int total_write_unhealthy_app_count = 0;\n    int total_read_unhealthy_app_count = 0;\n    if (detailed && available_app_count > 0) {\n        dsn::utils::table_printer tp_health(\"healthy_info\");\n        tp_health.add_title(\"app_id\");\n        tp_health.add_column(\"app_name\");\n        tp_health.add_column(\"partition_count\");\n        tp_health.add_column(\"fully_healthy\");\n        tp_health.add_column(\"unhealthy\");\n        tp_health.add_column(\"write_unhealthy\");\n        tp_health.add_column(\"read_unhealthy\");\n        for (auto &info : apps) {\n            if (info.status != app_status::AS_AVAILABLE) {\n                continue;\n            }\n            int32_t app_id;\n            int32_t partition_count;\n            std::vector<partition_configuration> partitions;\n            r = list_app(info.app_name, app_id, partition_count, partitions);\n            if (r != dsn::ERR_OK) {\n                derror(\"list app(%s) failed, err = %s\", info.app_name.c_str(), r.to_string());\n                return r;\n            }\n            dassert(info.app_id == app_id, \"invalid app_id, %d VS %d\", info.app_id, app_id);\n            dassert(info.partition_count == partition_count,\n                    \"invalid partition_count, %d VS %d\",\n                    info.partition_count,\n                    partition_count);\n            int fully_healthy = 0;\n            int write_unhealthy = 0;\n            int read_unhealthy = 0;\n            for (int i = 0; i < partitions.size(); i++) {\n                const dsn::partition_configuration &p = partitions[i];\n                int replica_count = 0;\n                if (!p.primary.is_invalid()) {\n                    replica_count++;\n                }\n                replica_count += p.secondaries.size();\n                if (!p.primary.is_invalid()) {\n                    if (replica_count >= p.max_replica_count)\n                        fully_healthy++;\n                    else if (replica_count < 2)\n                        write_unhealthy++;\n                } else {\n                    write_unhealthy++;\n                    read_unhealthy++;\n                }\n            }\n            tp_health.add_row(info.app_id);\n            tp_health.append_data(info.app_name);\n            tp_health.append_data(info.partition_count);\n            tp_health.append_data(fully_healthy);\n            tp_health.append_data(info.partition_count - fully_healthy);\n            tp_health.append_data(write_unhealthy);\n            tp_health.append_data(read_unhealthy);\n\n            if (fully_healthy == info.partition_count)\n                total_fully_healthy_app_count++;\n            else\n                total_unhealthy_app_count++;\n            if (write_unhealthy > 0)\n                total_write_unhealthy_app_count++;\n            if (read_unhealthy > 0)\n                total_read_unhealthy_app_count++;\n        }\n        mtp.add(std::move(tp_health));\n    }\n\n    dsn::utils::table_printer tp_count(\"summary\");\n    tp_count.add_row_name_and_data(\"total_app_count\", available_app_count);\n    if (detailed && available_app_count > 0) {\n        tp_count.add_row_name_and_data(\"fully_healthy_app_count\", total_fully_healthy_app_count);\n        tp_count.add_row_name_and_data(\"unhealthy_app_count\", total_unhealthy_app_count);\n        tp_count.add_row_name_and_data(\"write_unhealthy_app_count\",\n                                       total_write_unhealthy_app_count);\n        tp_count.add_row_name_and_data(\"read_unhealthy_app_count\", total_read_unhealthy_app_count);\n    }\n    mtp.add(std::move(tp_count));\n\n    mtp.output(out, json ? tp_output_format::kJsonPretty : tp_output_format::kTabular);\n\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replication_ddl_client::list_nodes(\n    const dsn::replication::node_status::type status,\n    std::map<dsn::rpc_address, dsn::replication::node_status::type> &nodes)\n{\n    std::shared_ptr<configuration_list_nodes_request> req(new configuration_list_nodes_request());\n    req->status = status;\n    auto resp_task = request_meta<configuration_list_nodes_request>(RPC_CM_LIST_NODES, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    dsn::replication::configuration_list_nodes_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    for (const dsn::replication::node_info &n : resp.infos) {\n        nodes[n.address] = n.status;\n    }\n\n    return dsn::ERR_OK;\n}\n\nstruct list_nodes_helper\n{\n    std::string node_name;\n    std::string node_status;\n    int primary_count;\n    int secondary_count;\n    list_nodes_helper(const std::string &n, const std::string &s)\n        : node_name(n), node_status(s), primary_count(0), secondary_count(0)\n    {\n    }\n};\n\nstd::string host_name_resolve(bool resolve_ip, std::string value)\n{\n    if (resolve_ip) {\n        std::string temp;\n        if (dsn::utils::hostname_from_ip_port(value.c_str(), &temp))\n            return temp;\n    }\n    return value;\n}\n\ndsn::error_code replication_ddl_client::list_nodes(const dsn::replication::node_status::type status,\n                                                   bool detailed,\n                                                   const std::string &file_name,\n                                                   bool resolve_ip)\n{\n    std::map<dsn::rpc_address, dsn::replication::node_status::type> nodes;\n    auto r = list_nodes(status, nodes);\n    if (r != dsn::ERR_OK) {\n        return r;\n    }\n\n    std::map<dsn::rpc_address, list_nodes_helper> tmp_map;\n    int alive_node_count = 0;\n    for (auto &kv : nodes) {\n        if (kv.second == dsn::replication::node_status::NS_ALIVE)\n            alive_node_count++;\n        std::string status_str = enum_to_string(kv.second);\n        status_str = status_str.substr(status_str.find(\"NS_\") + 3);\n        tmp_map.emplace(\n            kv.first,\n            list_nodes_helper(host_name_resolve(resolve_ip, kv.first.to_std_string()), status_str));\n    }\n\n    if (detailed) {\n        std::vector<::dsn::app_info> apps;\n        r = list_apps(dsn::app_status::AS_AVAILABLE, apps);\n        if (r != dsn::ERR_OK) {\n            return r;\n        }\n\n        for (auto &app : apps) {\n            int32_t app_id;\n            int32_t partition_count;\n            std::vector<partition_configuration> partitions;\n            r = list_app(app.app_name, app_id, partition_count, partitions);\n            if (r != dsn::ERR_OK) {\n                return r;\n            }\n\n            for (int i = 0; i < partitions.size(); i++) {\n                const dsn::partition_configuration &p = partitions[i];\n                if (!p.primary.is_invalid()) {\n                    auto find = tmp_map.find(p.primary);\n                    if (find != tmp_map.end()) {\n                        find->second.primary_count++;\n                    }\n                }\n                for (int j = 0; j < p.secondaries.size(); j++) {\n                    auto find = tmp_map.find(p.secondaries[j]);\n                    if (find != tmp_map.end()) {\n                        find->second.secondary_count++;\n                    }\n                }\n            }\n        }\n    }\n\n    // print configuration_list_nodes_response\n    std::streambuf *buf;\n    std::ofstream of;\n\n    if (!file_name.empty()) {\n        of.open(file_name);\n        buf = of.rdbuf();\n    } else {\n        buf = std::cout.rdbuf();\n    }\n    std::ostream out(buf);\n\n    dsn::utils::table_printer tp;\n    tp.add_title(\"address\");\n    tp.add_column(\"status\");\n    if (detailed) {\n        tp.add_column(\"replica_count\");\n        tp.add_column(\"primary_count\");\n        tp.add_column(\"secondary_count\");\n    }\n    for (auto &kv : tmp_map) {\n        tp.add_row(kv.second.node_name);\n        tp.append_data(kv.second.node_status);\n        if (detailed) {\n            tp.append_data(kv.second.primary_count + kv.second.secondary_count);\n            tp.append_data(kv.second.primary_count);\n            tp.append_data(kv.second.secondary_count);\n        }\n    }\n    tp.output(out);\n    out << std::endl;\n\n    dsn::utils::table_printer tp_count;\n    tp_count.add_row_name_and_data(\"total_node_count\", nodes.size());\n    tp_count.add_row_name_and_data(\"alive_node_count\", alive_node_count);\n    tp_count.add_row_name_and_data(\"unalive_node_count\", nodes.size() - alive_node_count);\n    tp_count.output(out);\n    out << std::endl;\n\n    return dsn::ERR_OK;\n#undef RESOLVE\n}\n\ndsn::error_code replication_ddl_client::cluster_name(int64_t timeout_ms, std::string &cluster_name)\n{\n    std::shared_ptr<configuration_cluster_info_request> req(\n        new configuration_cluster_info_request());\n\n    auto resp_task =\n        request_meta<configuration_cluster_info_request>(RPC_CM_CLUSTER_INFO, req, timeout_ms);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_cluster_info_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    cluster_name.clear();\n    for (int i = 0; i < resp.keys.size(); ++i) {\n        if (resp.keys[i] == \"cluster_name\") {\n            cluster_name = resp.values[i];\n        }\n    }\n\n    return cluster_name.empty() ? dsn::ERR_UNKNOWN : dsn::ERR_OK;\n}\n\ndsn::error_code\nreplication_ddl_client::cluster_info(const std::string &file_name, bool resolve_ip, bool json)\n{\n    std::shared_ptr<configuration_cluster_info_request> req(\n        new configuration_cluster_info_request());\n\n    auto resp_task = request_meta<configuration_cluster_info_request>(RPC_CM_CLUSTER_INFO, req);\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_cluster_info_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    // print configuration_cluster_info_response\n    std::streambuf *buf;\n    std::ofstream of;\n\n    if (!file_name.empty()) {\n        of.open(file_name);\n        buf = of.rdbuf();\n    } else {\n        buf = std::cout.rdbuf();\n    }\n    std::ostream out(buf);\n\n    if (resolve_ip) {\n        for (int i = 0; i < resp.keys.size(); ++i) {\n            if (resp.keys[i] == \"meta_servers\") {\n                dsn::utils::list_hostname_from_ip_port(resp.values[i].c_str(), &resp.values[i]);\n            } else if (resp.keys[i] == \"primary_meta_server\") {\n                dsn::utils::hostname_from_ip_port(resp.values[i].c_str(), &resp.values[i]);\n            }\n        }\n    }\n\n    dsn::utils::table_printer tp(\"cluster_info\");\n    for (int i = 0; i < resp.keys.size(); i++) {\n        tp.add_row_name_and_data(resp.keys[i], resp.values[i]);\n    }\n    tp.output(out, json ? tp_output_format::kJsonPretty : tp_output_format::kTabular);\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replication_ddl_client::list_app(const std::string &app_name,\n                                                 bool detailed,\n                                                 bool json,\n                                                 const std::string &file_name,\n                                                 bool resolve_ip)\n{\n    dsn::utils::multi_table_printer mtp;\n    dsn::utils::table_printer tp_params(\"parameters\");\n    if (!(app_name.empty() && file_name.empty())) {\n        if (!app_name.empty())\n            tp_params.add_row_name_and_data(\"app_name\", app_name);\n        if (!file_name.empty())\n            tp_params.add_row_name_and_data(\"out_file\", file_name);\n    }\n    tp_params.add_row_name_and_data(\"detailed\", detailed);\n    mtp.add(std::move(tp_params));\n    int32_t app_id = 0;\n    int32_t partition_count = 0;\n    int32_t max_replica_count = 0;\n    std::vector<partition_configuration> partitions;\n    dsn::error_code err = list_app(app_name, app_id, partition_count, partitions);\n    if (err != dsn::ERR_OK) {\n        return err;\n    }\n    if (!partitions.empty()) {\n        max_replica_count = partitions[0].max_replica_count;\n    }\n\n    // print configuration_query_by_index_response\n    std::streambuf *buf;\n    std::ofstream of;\n\n    if (!file_name.empty()) {\n        of.open(file_name);\n        buf = of.rdbuf();\n    } else {\n        buf = std::cout.rdbuf();\n    }\n    std::ostream out(buf);\n\n    dsn::utils::table_printer tp_general(\"general\");\n    tp_general.add_row_name_and_data(\"app_name\", app_name);\n    tp_general.add_row_name_and_data(\"app_id\", app_id);\n    tp_general.add_row_name_and_data(\"partition_count\", partition_count);\n    tp_general.add_row_name_and_data(\"max_replica_count\", max_replica_count);\n    mtp.add(std::move(tp_general));\n\n    if (detailed) {\n        dsn::utils::table_printer tp_details(\"replicas\");\n        tp_details.add_title(\"pidx\");\n        tp_details.add_column(\"ballot\");\n        tp_details.add_column(\"replica_count\");\n        tp_details.add_column(\"primary\");\n        tp_details.add_column(\"secondaries\");\n        std::map<rpc_address, std::pair<int, int>> node_stat;\n\n        int total_prim_count = 0;\n        int total_sec_count = 0;\n        int fully_healthy = 0;\n        int write_unhealthy = 0;\n        int read_unhealthy = 0;\n        for (const auto &p : partitions) {\n            int replica_count = 0;\n            if (!p.primary.is_invalid()) {\n                replica_count++;\n                node_stat[p.primary].first++;\n                total_prim_count++;\n            }\n            replica_count += p.secondaries.size();\n            total_sec_count += p.secondaries.size();\n            if (!p.primary.is_invalid()) {\n                if (replica_count >= p.max_replica_count)\n                    fully_healthy++;\n                else if (replica_count < 2)\n                    write_unhealthy++;\n            } else {\n                write_unhealthy++;\n                read_unhealthy++;\n            }\n            tp_details.add_row(p.pid.get_partition_index());\n            tp_details.append_data(p.ballot);\n            std::stringstream oss;\n            oss << replica_count << \"/\" << p.max_replica_count;\n            tp_details.append_data(oss.str());\n            tp_details.append_data(\n                (p.primary.is_invalid() ? \"-\" : host_name_resolve(resolve_ip,\n                                                                  p.primary.to_std_string())));\n            oss.str(\"\");\n            oss << \"[\";\n            // TODO (yingchun) join\n            for (int j = 0; j < p.secondaries.size(); j++) {\n                if (j != 0)\n                    oss << \",\";\n                oss << host_name_resolve(resolve_ip, p.secondaries[j].to_std_string());\n                node_stat[p.secondaries[j]].second++;\n            }\n            oss << \"]\";\n            tp_details.append_data(oss.str());\n        }\n        mtp.add(std::move(tp_details));\n\n        // 'node' section.\n        dsn::utils::table_printer tp_nodes(\"nodes\");\n        tp_nodes.add_title(\"node\");\n        tp_nodes.add_column(\"primary\");\n        tp_nodes.add_column(\"secondary\");\n        tp_nodes.add_column(\"total\");\n        for (auto &kv : node_stat) {\n            tp_nodes.add_row(host_name_resolve(resolve_ip, kv.first.to_std_string()));\n            tp_nodes.append_data(kv.second.first);\n            tp_nodes.append_data(kv.second.second);\n            tp_nodes.append_data(kv.second.first + kv.second.second);\n        }\n        tp_nodes.add_row(\"\");\n        tp_nodes.append_data(total_prim_count);\n        tp_nodes.append_data(total_sec_count);\n        tp_nodes.append_data(total_prim_count + total_sec_count);\n        mtp.add(std::move(tp_nodes));\n\n        // healthy partition count section.\n        dsn::utils::table_printer tp_hpc(\"healthy\");\n        tp_hpc.add_row_name_and_data(\"fully_healthy_partition_count\", fully_healthy);\n        tp_hpc.add_row_name_and_data(\"unhealthy_partition_count\", partition_count - fully_healthy);\n        tp_hpc.add_row_name_and_data(\"write_unhealthy_partition_count\", write_unhealthy);\n        tp_hpc.add_row_name_and_data(\"read_unhealthy_partition_count\", read_unhealthy);\n        mtp.add(std::move(tp_hpc));\n    }\n    mtp.output(out, json ? tp_output_format::kJsonPretty : tp_output_format::kTabular);\n    return dsn::ERR_OK;\n#undef RESOLVE\n}\n\ndsn::error_code replication_ddl_client::list_app(const std::string &app_name,\n                                                 int32_t &app_id,\n                                                 int32_t &partition_count,\n                                                 std::vector<partition_configuration> &partitions)\n{\n    if (app_name.empty() ||\n        !std::all_of(app_name.cbegin(),\n                     app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char))\n        return ERR_INVALID_PARAMETERS;\n\n    std::shared_ptr<configuration_query_by_index_request> req(\n        new configuration_query_by_index_request());\n    req->app_name = app_name;\n\n    auto resp_task = request_meta<configuration_query_by_index_request>(\n        RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, req);\n\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    dsn::configuration_query_by_index_response resp;\n    dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    app_id = resp.app_id;\n    partition_count = resp.partition_count;\n    partitions = resp.partitions;\n\n    return dsn::ERR_OK;\n}\n\ndsn::replication::configuration_meta_control_response\nreplication_ddl_client::control_meta_function_level(meta_function_level::type level)\n{\n    std::shared_ptr<configuration_meta_control_request> req =\n        std::make_shared<configuration_meta_control_request>();\n    req->level = level;\n\n    auto response_task = request_meta<configuration_meta_control_request>(RPC_CM_CONTROL_META, req);\n    response_task->wait();\n    configuration_meta_control_response resp;\n    if (response_task->error() != dsn::ERR_OK) {\n        resp.err = response_task->error();\n    } else {\n        dsn::unmarshall(response_task->get_response(), resp);\n    }\n    return resp;\n}\n\ndsn::error_code\nreplication_ddl_client::send_balancer_proposal(const configuration_balancer_request &request)\n{\n    std::shared_ptr<configuration_balancer_request> req =\n        std::make_shared<configuration_balancer_request>(request);\n\n    auto response_task = request_meta<configuration_balancer_request>(RPC_CM_PROPOSE_BALANCER, req);\n    response_task->wait();\n    if (response_task->error() != dsn::ERR_OK)\n        return response_task->error();\n    dsn::replication::configuration_balancer_response resp;\n    dsn::unmarshall(response_task->get_response(), resp);\n    return resp.err;\n}\n\ndsn::error_code replication_ddl_client::do_recovery(const std::vector<rpc_address> &replica_nodes,\n                                                    int wait_seconds,\n                                                    bool skip_bad_nodes,\n                                                    bool skip_lost_partitions,\n                                                    const std::string &outfile)\n{\n    std::streambuf *buf;\n    std::ofstream of;\n\n    if (!outfile.empty()) {\n        of.open(outfile);\n        buf = of.rdbuf();\n    } else {\n        buf = std::cout.rdbuf();\n    }\n    std::ostream out(buf);\n\n    std::shared_ptr<configuration_recovery_request> req =\n        std::make_shared<configuration_recovery_request>();\n    req->recovery_set.clear();\n    for (const dsn::rpc_address &node : replica_nodes) {\n        if (std::find(req->recovery_set.begin(), req->recovery_set.end(), node) !=\n            req->recovery_set.end()) {\n            out << \"duplicate replica node \" << node.to_string() << \", just ingore it\" << std::endl;\n        } else {\n            req->recovery_set.push_back(node);\n        }\n    }\n    if (req->recovery_set.empty()) {\n        out << \"node set for recovery it empty\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n    req->skip_bad_nodes = skip_bad_nodes;\n    req->skip_lost_partitions = skip_lost_partitions;\n\n    out << \"Wait seconds: \" << wait_seconds << std::endl;\n    out << \"Skip bad nodes: \" << (skip_bad_nodes ? \"true\" : \"false\") << std::endl;\n    out << \"Skip lost partitions: \" << (skip_lost_partitions ? \"true\" : \"false\") << std::endl;\n    out << \"Node list:\" << std::endl;\n    out << \"=============================\" << std::endl;\n    for (auto &node : req->recovery_set) {\n        out << node.to_string() << std::endl;\n    }\n    out << \"=============================\" << std::endl;\n\n    auto response_task = request_meta<configuration_recovery_request>(\n        RPC_CM_START_RECOVERY, req, wait_seconds * 1000);\n    bool wait_done = false;\n    for (int i = 0; i < wait_seconds; ++i) {\n        wait_done = response_task->wait(1000);\n        if (wait_done)\n            break;\n        else\n            out << \"Wait recovery for \" << i << \" seconds\" << std::endl;\n    }\n\n    if (!wait_done || response_task->get_response() == NULL) {\n        out << \"Wait recovery failed, administrator should check the meta for progress\"\n            << std::endl;\n        return dsn::ERR_TIMEOUT;\n    } else {\n        configuration_recovery_response resp;\n        dsn::unmarshall(response_task->get_response(), resp);\n        out << \"Recover result: \" << resp.err.to_string() << std::endl;\n        if (!resp.hint_message.empty()) {\n            out << \"=============================\" << std::endl;\n            out << resp.hint_message;\n            out << \"=============================\" << std::endl;\n        }\n        return resp.err;\n    }\n}\n\ndsn::error_code replication_ddl_client::do_restore(const std::string &backup_provider_name,\n                                                   const std::string &cluster_name,\n                                                   const std::string &policy_name,\n                                                   int64_t timestamp,\n                                                   const std::string &old_app_name,\n                                                   int32_t old_app_id,\n                                                   const std::string &new_app_name,\n                                                   bool skip_bad_partition,\n                                                   const std::string &restore_path)\n{\n    if (old_app_name.empty() ||\n        !std::all_of(old_app_name.cbegin(),\n                     old_app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char)) {\n        std::cout << \"restore app \" << old_app_name << \" failed: invalid old_app_name\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    if (new_app_name.empty() ||\n        !std::all_of(new_app_name.cbegin(),\n                     new_app_name.cend(),\n                     (bool (*)(int))replication_ddl_client::valid_app_char)) {\n        std::cout << \"restore app \" << new_app_name << \" failed: invalid new_app_name\" << std::endl;\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    std::shared_ptr<configuration_restore_request> req =\n        std::make_shared<configuration_restore_request>();\n\n    req->cluster_name = cluster_name;\n    req->policy_name = policy_name;\n    req->app_name = old_app_name;\n    req->app_id = old_app_id;\n    req->new_app_name = new_app_name;\n    req->backup_provider_name = backup_provider_name;\n    req->time_stamp = timestamp;\n    req->skip_bad_partition = skip_bad_partition;\n    if (!restore_path.empty()) {\n        req->__set_restore_path(restore_path);\n        std::cout << \"restore app from the specified path : \" << restore_path << std::endl;\n    }\n\n    auto resp_task = request_meta<configuration_restore_request>(RPC_CM_START_RESTORE, req);\n    bool finish = false;\n    while (!finish) {\n        std::cout << \"sleep 1 second to wait complete...\" << std::endl;\n        finish = resp_task->wait(1000);\n    }\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    } else {\n        configuration_create_app_response resp;\n        dsn::unmarshall(resp_task->get_response(), resp);\n        if (resp.err == ERR_OBJECT_NOT_FOUND) {\n            std::cout << \"restore app failed: couldn't find valid app metadata\" << std::endl;\n        } else if (resp.err == ERR_OK) {\n            std::cout << \"\\t\"\n                      << \"new app_id = \" << resp.appid << std::endl;\n        }\n        return resp.err;\n    }\n}\n\ndsn::error_code replication_ddl_client::add_backup_policy(const std::string &policy_name,\n                                                          const std::string &backup_provider_type,\n                                                          const std::vector<int32_t> &app_ids,\n                                                          int64_t backup_interval_seconds,\n                                                          int32_t backup_history_cnt,\n                                                          const std::string &start_time)\n{\n    std::shared_ptr<configuration_add_backup_policy_request> req =\n        std::make_shared<configuration_add_backup_policy_request>();\n    req->policy_name = policy_name;\n    req->backup_provider_type = backup_provider_type;\n    req->app_ids = app_ids;\n    req->backup_interval_seconds = backup_interval_seconds;\n    req->backup_history_count_to_keep = backup_history_cnt;\n    req->start_time = start_time;\n    auto resp_task =\n        request_meta<configuration_add_backup_policy_request>(RPC_CM_ADD_BACKUP_POLICY, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_add_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else {\n        std::cout << \"add backup policy succeed, policy_name = \" << policy_name << std::endl;\n    }\n    return ERR_OK;\n}\n\nerror_with<start_backup_app_response> replication_ddl_client::backup_app(\n    int32_t app_id, const std::string &backup_provider_type, const std::string &backup_path)\n{\n    auto req = make_unique<start_backup_app_request>();\n    req->app_id = app_id;\n    req->backup_provider_type = backup_provider_type;\n    if (!backup_path.empty()) {\n        req->__set_backup_path(backup_path);\n    }\n    return call_rpc_sync(start_backup_app_rpc(std::move(req), RPC_CM_START_BACKUP_APP));\n}\n\nerror_with<query_backup_status_response> replication_ddl_client::query_backup(int32_t app_id,\n                                                                              int64_t backup_id)\n{\n    auto req = make_unique<query_backup_status_request>();\n    req->app_id = app_id;\n\n    if (backup_id > 0) {\n        req->__set_backup_id(backup_id);\n    }\n    return call_rpc_sync(query_backup_status_rpc(std::move(req), RPC_CM_QUERY_BACKUP_STATUS));\n}\n\ndsn::error_code replication_ddl_client::disable_backup_policy(const std::string &policy_name)\n{\n    std::shared_ptr<configuration_modify_backup_policy_request> req =\n        std::make_shared<configuration_modify_backup_policy_request>();\n    req->policy_name = policy_name;\n    req->__set_is_disable(true);\n\n    auto resp_task =\n        request_meta<configuration_modify_backup_policy_request>(RPC_CM_MODIFY_BACKUP_POLICY, req);\n\n    resp_task->wait();\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_modify_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else {\n        std::cout << \"disable policy result: \" << resp.err.to_string() << std::endl;\n        if (!resp.hint_message.empty()) {\n            std::cout << \"=============================\" << std::endl;\n            std::cout << resp.hint_message << std::endl;\n            std::cout << \"=============================\" << std::endl;\n        }\n        return resp.err;\n    }\n}\n\ndsn::error_code replication_ddl_client::enable_backup_policy(const std::string &policy_name)\n{\n    std::shared_ptr<configuration_modify_backup_policy_request> req =\n        std::make_shared<configuration_modify_backup_policy_request>();\n    req->policy_name = policy_name;\n    req->__set_is_disable(false);\n\n    auto resp_task =\n        request_meta<configuration_modify_backup_policy_request>(RPC_CM_MODIFY_BACKUP_POLICY, req);\n\n    resp_task->wait();\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_modify_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else if (resp.err == ERR_BUSY) {\n        std::cout << \"policy is under backup, please try disable later\" << std::endl;\n        return ERR_OK;\n    } else {\n        std::cout << \"enable policy result: \" << resp.err.to_string() << std::endl;\n        if (!resp.hint_message.empty()) {\n            std::cout << \"=============================\" << std::endl;\n            std::cout << resp.hint_message << std::endl;\n            std::cout << \"=============================\" << std::endl;\n        }\n        return resp.err;\n    }\n}\n\n// help functions\n\n// TODO (yingchun) use join\ntemplate <typename T>\n// make sure T support cout << T;\nstd::string print_set(const std::set<T> &set)\n{\n    std::stringstream ss;\n    ss << \"{\";\n    auto begin = set.begin();\n    auto end = set.end();\n    for (auto it = begin; it != end; it++) {\n        if (it != begin) {\n            ss << \", \";\n        }\n        ss << *it;\n    }\n    ss << \"}\";\n    return ss.str();\n}\n\nstatic void print_policy_entry(const policy_entry &entry)\n{\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"    name\", entry.policy_name);\n    tp.add_row_name_and_data(\"    backup_provider_type\", entry.backup_provider_type);\n    tp.add_row_name_and_data(\"    backup_interval\", entry.backup_interval_seconds + \"s\");\n    tp.add_row_name_and_data(\"    app_ids\", print_set(entry.app_ids));\n    tp.add_row_name_and_data(\"    start_time\", entry.start_time);\n    tp.add_row_name_and_data(\"    status\", entry.is_disable ? \"disabled\" : \"enabled\");\n    tp.add_row_name_and_data(\"    backup_history_count\", entry.backup_history_count_to_keep);\n    tp.output(std::cout);\n}\n\nstatic void print_backup_entry(const backup_entry &bentry)\n{\n    char start_time[30] = {'\\0'};\n    char end_time[30] = {'\\0'};\n    ::dsn::utils::time_ms_to_date_time(bentry.start_time_ms, start_time, 30);\n    if (bentry.end_time_ms == 0) {\n        end_time[0] = '-';\n        end_time[1] = '\\0';\n    } else {\n        ::dsn::utils::time_ms_to_date_time(bentry.end_time_ms, end_time, 30);\n    }\n\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"    id\", bentry.backup_id);\n    tp.add_row_name_and_data(\"    start_time\", start_time);\n    tp.add_row_name_and_data(\"    end_time\", end_time);\n    tp.add_row_name_and_data(\"    app_ids\", print_set(bentry.app_ids));\n    tp.output(std::cout);\n}\n\ndsn::error_code replication_ddl_client::ls_backup_policy()\n{\n    std::shared_ptr<configuration_query_backup_policy_request> req =\n        std::make_shared<configuration_query_backup_policy_request>();\n    req->policy_names.clear();\n    req->backup_info_count = 0;\n\n    auto resp_task =\n        request_meta<configuration_query_backup_policy_request>(RPC_CM_QUERY_BACKUP_POLICY, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n    configuration_query_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else {\n        for (int32_t idx = 0; idx < resp.policys.size(); idx++) {\n            std::cout << \"[\" << idx + 1 << \"]\" << std::endl;\n            print_policy_entry(resp.policys[idx]);\n            std::cout << std::endl;\n        }\n    }\n    return ERR_OK;\n}\n\ndsn::error_code\nreplication_ddl_client::query_backup_policy(const std::vector<std::string> &policy_names,\n                                            int backup_info_cnt)\n{\n    std::shared_ptr<configuration_query_backup_policy_request> req =\n        std::make_shared<configuration_query_backup_policy_request>();\n    req->policy_names = policy_names;\n    req->backup_info_count = backup_info_cnt;\n\n    auto resp_task =\n        request_meta<configuration_query_backup_policy_request>(RPC_CM_QUERY_BACKUP_POLICY, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_query_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else {\n        for (int32_t idx = 0; idx < resp.policys.size(); idx++) {\n            if (idx != 0) {\n                std::cout << \"************************\" << std::endl;\n            }\n            const policy_entry &pentry = resp.policys[idx];\n            std::cout << \"policy_info:\" << std::endl;\n            print_policy_entry(pentry);\n            std::cout << std::endl << \"backup_infos:\" << std::endl;\n            const std::vector<backup_entry> &backup_infos = resp.backup_infos[idx];\n            for (int idx = 0; idx < backup_infos.size(); idx++) {\n                std::cout << \"[\" << (idx + 1) << \"]\" << std::endl;\n                print_backup_entry(backup_infos[idx]);\n            }\n        }\n    }\n    return ERR_OK;\n}\n\ndsn::error_code\nreplication_ddl_client::update_backup_policy(const std::string &policy_name,\n                                             const std::vector<int32_t> &add_appids,\n                                             const std::vector<int32_t> &removal_appids,\n                                             int64_t new_backup_interval_sec,\n                                             int32_t backup_history_count_to_keep,\n                                             const std::string &start_time)\n{\n    std::shared_ptr<configuration_modify_backup_policy_request> req =\n        std::make_shared<configuration_modify_backup_policy_request>();\n    req->policy_name = policy_name;\n    if (!add_appids.empty()) {\n        req->__set_add_appids(add_appids);\n    }\n    if (!removal_appids.empty()) {\n        req->__set_removal_appids(removal_appids);\n    }\n    if (new_backup_interval_sec > 0) {\n        req->__set_new_backup_interval_sec(new_backup_interval_sec);\n    }\n\n    if (backup_history_count_to_keep > 0) {\n        req->__set_backup_history_count_to_keep(backup_history_count_to_keep);\n    }\n\n    if (!start_time.empty()) {\n        req->__set_start_time(start_time);\n    }\n    auto resp_task =\n        request_meta<configuration_modify_backup_policy_request>(RPC_CM_MODIFY_BACKUP_POLICY, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_modify_backup_policy_response resp;\n    ::dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != ERR_OK) {\n        return resp.err;\n    } else {\n        std::cout << \"Modify policy result: \" << resp.err.to_string() << std::endl;\n        if (!resp.hint_message.empty()) {\n            std::cout << \"=============================\" << std::endl;\n            std::cout << resp.hint_message << std::endl;\n            std::cout << \"=============================\" << std::endl;\n        }\n        return resp.err;\n    }\n}\n\ndsn::error_code replication_ddl_client::query_restore(int32_t restore_app_id, bool detailed)\n{\n    if (restore_app_id <= 0) {\n        return ERR_INVALID_PARAMETERS;\n    }\n    std::shared_ptr<configuration_query_restore_request> req =\n        std::make_shared<configuration_query_restore_request>();\n    req->restore_app_id = restore_app_id;\n\n    auto resp_task =\n        request_meta<configuration_query_restore_request>(RPC_CM_QUERY_RESTORE_STATUS, req);\n\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n\n    configuration_query_restore_response response;\n    ::dsn::unmarshall(resp_task->get_response(), response);\n    if (response.err == ERR_OK) {\n        int overall_progress = 0;\n        for (const auto &p : response.restore_progress) {\n            overall_progress += p;\n        }\n        overall_progress = overall_progress / response.restore_progress.size();\n        overall_progress = overall_progress / 10;\n\n        if (detailed) {\n            int width = strlen(\"restore_status\");\n            std::cout << std::setw(width) << std::left << \"pid\" << std::setw(width) << std::left\n                      << \"progress(%)\" << std::setw(width) << std::left << \"restore_status\"\n                      << std::endl;\n            for (int idx = 0; idx < response.restore_status.size(); idx++) {\n                std::string restore_status = std::string(\"unknown\");\n                if (response.restore_status[idx] == ::dsn::ERR_OK) {\n                    restore_status = (response.restore_progress[idx] == 1000) ? \"ok\" : \"ongoing\";\n                } else if (response.restore_status[idx] == ERR_IGNORE_BAD_DATA) {\n                    restore_status = \"skip\";\n                }\n                int progress = response.restore_progress[idx] / 10;\n                std::cout << std::setw(width) << std::left << idx << std::setw(width) << std::left\n                          << progress << std::setw(width) << std::left << restore_status\n                          << std::endl;\n            }\n\n            std::cout << std::endl\n                      << \"the overall progress of restore is \" << overall_progress << \"%\"\n                      << std::endl;\n\n            std::cout << std::endl << \"annotations:\" << std::endl;\n            std::cout << \"    ok : mean restore complete\" << std::endl;\n            std::cout << \"    ongoing : mean restore is under going\" << std::endl;\n            std::cout\n                << \"    skip : data on cold backup media is damaged, but skip the damaged partition\"\n                << std::endl;\n            std::cout << \"    unknown : invalid, should login server and check it\" << std::endl;\n        } else {\n            std::cout << \"the overall progress of restore is \" << overall_progress << \"%\"\n                      << std::endl;\n        }\n    } else if (response.err == ERR_APP_NOT_EXIST) {\n        std::cout << \"invalid restore_app_id(\" << restore_app_id << \")\" << std::endl;\n    } else if (response.err == ERR_APP_DROPPED) {\n        std::cout << \"restore failed, because some partition's data is damaged on cold backup media\"\n                  << std::endl;\n    }\n    return ERR_OK;\n}\n\nerror_with<duplication_add_response> replication_ddl_client::add_dup(\n    std::string app_name, std::string remote_cluster_name, bool is_duplicating_checkpoint)\n{\n    auto req = make_unique<duplication_add_request>();\n    req->app_name = std::move(app_name);\n    req->remote_cluster_name = std::move(remote_cluster_name);\n    req->is_duplicating_checkpoint = is_duplicating_checkpoint;\n    return call_rpc_sync(duplication_add_rpc(std::move(req), RPC_CM_ADD_DUPLICATION));\n}\n\nerror_with<duplication_modify_response> replication_ddl_client::change_dup_status(\n    std::string app_name, int dupid, duplication_status::type status)\n{\n    auto req = make_unique<duplication_modify_request>();\n    req->app_name = std::move(app_name);\n    req->dupid = dupid;\n    req->__set_status(status);\n    return call_rpc_sync(duplication_modify_rpc(std::move(req), RPC_CM_MODIFY_DUPLICATION));\n}\n\nerror_with<duplication_modify_response> replication_ddl_client::update_dup_fail_mode(\n    std::string app_name, int dupid, duplication_fail_mode::type fmode)\n{\n    if (_duplication_fail_mode_VALUES_TO_NAMES.find(fmode) ==\n        _duplication_fail_mode_VALUES_TO_NAMES.end()) {\n        return FMT_ERR(ERR_INVALID_PARAMETERS, \"unexpected duplication_fail_mode {}\", fmode);\n    }\n    auto req = make_unique<duplication_modify_request>();\n    req->app_name = std::move(app_name);\n    req->dupid = dupid;\n    req->__set_fail_mode(fmode);\n    return call_rpc_sync(duplication_modify_rpc(std::move(req), RPC_CM_MODIFY_DUPLICATION));\n}\n\nerror_with<duplication_query_response> replication_ddl_client::query_dup(std::string app_name)\n{\n    auto req = make_unique<duplication_query_request>();\n    req->app_name = std::move(app_name);\n    return call_rpc_sync(duplication_query_rpc(std::move(req), RPC_CM_QUERY_DUPLICATION));\n}\n\nbool replication_ddl_client::valid_app_char(int c)\n{\n    return (bool)std::isalnum(c) || c == '_' || c == '.' || c == ':';\n}\n\nvoid replication_ddl_client::end_meta_request(const rpc_response_task_ptr &callback,\n                                              int retry_times,\n                                              error_code err,\n                                              dsn::message_ex *request,\n                                              dsn::message_ex *resp)\n{\n    if (err != dsn::ERR_OK && retry_times < 2) {\n        rpc::call(_meta_server,\n                  request,\n                  &_tracker,\n                  [this, retry_times, callback](\n                      error_code err, dsn::message_ex *request, dsn::message_ex *response) mutable {\n                      end_meta_request(callback, retry_times + 1, err, request, response);\n                  });\n    } else {\n        callback->enqueue(err, (message_ex *)resp);\n    }\n}\n\ndsn::error_code replication_ddl_client::get_app_envs(const std::string &app_name,\n                                                     std::map<std::string, std::string> &envs)\n{\n    std::vector<::dsn::app_info> apps;\n    auto r = list_apps(dsn::app_status::AS_AVAILABLE, apps);\n    if (r != dsn::ERR_OK) {\n        return r;\n    }\n\n    for (auto &app : apps) {\n        if (app.app_name == app_name) {\n            envs = app.envs;\n            return dsn::ERR_OK;\n        }\n    }\n\n    return dsn::ERR_OBJECT_NOT_FOUND;\n}\n\nerror_with<configuration_update_app_env_response>\nreplication_ddl_client::set_app_envs(const std::string &app_name,\n                                     const std::vector<std::string> &keys,\n                                     const std::vector<std::string> &values)\n{\n    auto req = make_unique<configuration_update_app_env_request>();\n    req->__set_app_name(app_name);\n    req->__set_keys(keys);\n    req->__set_values(values);\n    req->__set_op(app_env_operation::type::APP_ENV_OP_SET);\n    return call_rpc_sync(update_app_env_rpc(std::move(req), RPC_CM_UPDATE_APP_ENV));\n}\n\n::dsn::error_code replication_ddl_client::del_app_envs(const std::string &app_name,\n                                                       const std::vector<std::string> &keys)\n{\n    std::shared_ptr<configuration_update_app_env_request> req =\n        std::make_shared<configuration_update_app_env_request>();\n    req->__set_app_name(app_name);\n    req->__set_op(app_env_operation::type::APP_ENV_OP_DEL);\n    req->__set_keys(keys);\n\n    auto resp_task = request_meta<configuration_update_app_env_request>(RPC_CM_UPDATE_APP_ENV, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n    configuration_update_app_env_response response;\n    ::dsn::unmarshall(resp_task->get_response(), response);\n    if (response.err != ERR_OK) {\n        return response.err;\n    } else {\n        std::cout << \"del app envs succeed\" << std::endl;\n        if (!response.hint_message.empty()) {\n            std::cout << \"=============================\" << std::endl;\n            std::cout << response.hint_message << std::endl;\n            std::cout << \"=============================\" << std::endl;\n        }\n    }\n    return ERR_OK;\n}\n\n::dsn::error_code replication_ddl_client::clear_app_envs(const std::string &app_name,\n                                                         bool clear_all,\n                                                         const std::string &prefix)\n{\n    std::shared_ptr<configuration_update_app_env_request> req =\n        std::make_shared<configuration_update_app_env_request>();\n    req->__set_app_name(app_name);\n    req->__set_op(app_env_operation::type::APP_ENV_OP_CLEAR);\n    if (clear_all) {\n        req->__set_clear_prefix(\"\");\n    } else {\n        dassert(!prefix.empty(), \"prefix can not be empty\");\n        req->__set_clear_prefix(prefix);\n    }\n\n    auto resp_task = request_meta<configuration_update_app_env_request>(RPC_CM_UPDATE_APP_ENV, req);\n    resp_task->wait();\n\n    if (resp_task->error() != ERR_OK) {\n        return resp_task->error();\n    }\n    configuration_update_app_env_response response;\n    ::dsn::unmarshall(resp_task->get_response(), response);\n    if (response.err != ERR_OK) {\n        return response.err;\n    } else {\n        std::cout << \"clear app envs succeed\" << std::endl;\n        if (!response.hint_message.empty()) {\n            std::cout << \"=============================\" << std::endl;\n            std::cout << response.hint_message << std::endl;\n            std::cout << \"=============================\" << std::endl;\n        }\n    }\n    return ERR_OK;\n}\n\ndsn::error_code\nreplication_ddl_client::ddd_diagnose(gpid pid, std::vector<ddd_partition_info> &ddd_partitions)\n{\n    std::shared_ptr<ddd_diagnose_request> req(new ddd_diagnose_request());\n    req->pid = pid;\n\n    auto resp_task = request_meta<ddd_diagnose_request>(RPC_CM_DDD_DIAGNOSE, req);\n\n    resp_task->wait();\n    if (resp_task->error() != dsn::ERR_OK) {\n        return resp_task->error();\n    }\n\n    ddd_diagnose_response resp;\n    dsn::unmarshall(resp_task->get_response(), resp);\n    if (resp.err != dsn::ERR_OK) {\n        return resp.err;\n    }\n\n    ddd_partitions = std::move(resp.partitions);\n\n    return dsn::ERR_OK;\n}\n\nvoid replication_ddl_client::query_disk_info(\n    const std::vector<dsn::rpc_address> &targets,\n    const std::string &app_name,\n    /*out*/ std::map<dsn::rpc_address, error_with<query_disk_info_response>> &resps)\n{\n    std::map<dsn::rpc_address, query_disk_info_rpc> query_disk_info_rpcs;\n    for (const auto &target : targets) {\n        auto request = make_unique<query_disk_info_request>();\n        request->node = target;\n        request->app_name = app_name;\n        query_disk_info_rpcs.emplace(target,\n                                     query_disk_info_rpc(std::move(request), RPC_QUERY_DISK_INFO));\n    }\n    call_rpcs_sync(query_disk_info_rpcs, resps);\n}\n\nerror_with<start_bulk_load_response>\nreplication_ddl_client::start_bulk_load(const std::string &app_name,\n                                        const std::string &cluster_name,\n                                        const std::string &file_provider_type,\n                                        const std::string &remote_root_path,\n                                        const bool ingest_behind)\n{\n    auto req = make_unique<start_bulk_load_request>();\n    req->app_name = app_name;\n    req->cluster_name = cluster_name;\n    req->file_provider_type = file_provider_type;\n    req->remote_root_path = remote_root_path;\n    req->ingest_behind = ingest_behind;\n    return call_rpc_sync(start_bulk_load_rpc(std::move(req), RPC_CM_START_BULK_LOAD));\n}\n\nerror_with<control_bulk_load_response>\nreplication_ddl_client::control_bulk_load(const std::string &app_name,\n                                          const bulk_load_control_type::type control_type)\n{\n    auto req = make_unique<control_bulk_load_request>();\n    req->app_name = app_name;\n    req->type = control_type;\n    return call_rpc_sync(control_bulk_load_rpc(std::move(req), RPC_CM_CONTROL_BULK_LOAD));\n}\n\nerror_with<query_bulk_load_response>\nreplication_ddl_client::query_bulk_load(const std::string &app_name)\n{\n\n    auto req = make_unique<query_bulk_load_request>();\n    req->app_name = app_name;\n    return call_rpc_sync(query_bulk_load_rpc(std::move(req), RPC_CM_QUERY_BULK_LOAD_STATUS));\n}\n\nerror_with<clear_bulk_load_state_response>\nreplication_ddl_client::clear_bulk_load(const std::string &app_name)\n{\n    auto req = make_unique<clear_bulk_load_state_request>();\n    req->app_name = app_name;\n    return call_rpc_sync(clear_bulk_load_rpc(std::move(req), RPC_CM_CLEAR_BULK_LOAD));\n}\n\nerror_code replication_ddl_client::detect_hotkey(const dsn::rpc_address &target,\n                                                 detect_hotkey_request &req,\n                                                 detect_hotkey_response &resp)\n{\n    std::map<dsn::rpc_address, detect_hotkey_rpc> detect_hotkey_rpcs;\n    auto request = make_unique<detect_hotkey_request>(req);\n    detect_hotkey_rpcs.emplace(target, detect_hotkey_rpc(std::move(request), RPC_DETECT_HOTKEY));\n    std::map<dsn::rpc_address, error_with<detect_hotkey_response>> resps;\n    call_rpcs_sync(detect_hotkey_rpcs, resps);\n    resp = resps.begin()->second.get_value();\n    return resps.begin()->second.get_error().code();\n}\n\nerror_with<start_partition_split_response>\nreplication_ddl_client::start_partition_split(const std::string &app_name, int new_partition_count)\n{\n    auto req = make_unique<start_partition_split_request>();\n    req->__set_app_name(app_name);\n    req->__set_new_partition_count(new_partition_count);\n    return call_rpc_sync(start_split_rpc(std::move(req), RPC_CM_START_PARTITION_SPLIT));\n}\n\nerror_with<control_split_response>\nreplication_ddl_client::pause_partition_split(const std::string &app_name,\n                                              const int32_t parent_pidx)\n{\n    return control_partition_split(app_name, split_control_type::PAUSE, parent_pidx, 0);\n}\n\nerror_with<control_split_response>\nreplication_ddl_client::restart_partition_split(const std::string &app_name,\n                                                const int32_t parent_pidx)\n{\n    return control_partition_split(app_name, split_control_type::RESTART, parent_pidx, 0);\n}\n\nerror_with<control_split_response>\nreplication_ddl_client::cancel_partition_split(const std::string &app_name,\n                                               const int32_t old_partition_count)\n{\n    return control_partition_split(app_name, split_control_type::CANCEL, -1, old_partition_count);\n}\n\nerror_with<control_split_response>\nreplication_ddl_client::control_partition_split(const std::string &app_name,\n                                                split_control_type::type control_type,\n                                                const int32_t parent_pidx,\n                                                const int32_t old_partition_count)\n{\n    auto req = make_unique<control_split_request>();\n    req->__set_app_name(app_name);\n    req->__set_control_type(control_type);\n    req->__set_parent_pidx(parent_pidx);\n    req->__set_old_partition_count(old_partition_count);\n    return call_rpc_sync(control_split_rpc(std::move(req), RPC_CM_CONTROL_PARTITION_SPLIT));\n}\n\nerror_with<query_split_response>\nreplication_ddl_client::query_partition_split(const std::string &app_name)\n{\n    auto req = make_unique<query_split_request>();\n    req->__set_app_name(app_name);\n    return call_rpc_sync(query_split_rpc(std::move(req), RPC_CM_QUERY_PARTITION_SPLIT));\n}\n\nerror_with<add_new_disk_response>\nreplication_ddl_client::add_new_disk(const rpc_address &target_node, const std::string &disk_str)\n{\n    auto req = make_unique<add_new_disk_request>();\n    req->disk_str = disk_str;\n\n    std::map<rpc_address, add_new_disk_rpc> add_new_disk_rpcs;\n    add_new_disk_rpcs.emplace(target_node, add_new_disk_rpc(std::move(req), RPC_ADD_NEW_DISK));\n\n    std::map<rpc_address, error_with<add_new_disk_response>> resps;\n    call_rpcs_sync(add_new_disk_rpcs, resps);\n    return resps.begin()->second.get_value();\n}\n\nerror_with<start_app_manual_compact_response> replication_ddl_client::start_app_manual_compact(\n    const std::string &app_name, bool bottommost, const int32_t level, const int32_t max_count)\n{\n    auto req = make_unique<start_app_manual_compact_request>();\n    req->app_name = app_name;\n    req->__set_trigger_time(dsn_now_s());\n    req->__set_target_level(level);\n    req->__set_bottommost(bottommost);\n    if (max_count > 0) {\n        req->__set_max_running_count(max_count);\n    }\n    return call_rpc_sync(start_manual_compact_rpc(std::move(req), RPC_CM_START_MANUAL_COMPACT));\n}\n\nerror_with<query_app_manual_compact_response>\nreplication_ddl_client::query_app_manual_compact(const std::string &app_name)\n{\n    auto req = make_unique<query_app_manual_compact_request>();\n    req->app_name = app_name;\n    return call_rpc_sync(\n        query_manual_compact_rpc(std::move(req), RPC_CM_QUERY_MANUAL_COMPACT_STATUS));\n}\n\nerror_with<configuration_get_max_replica_count_response>\nreplication_ddl_client::get_max_replica_count(const std::string &app_name)\n{\n    auto req = make_unique<configuration_get_max_replica_count_request>();\n    req->__set_app_name(app_name);\n    return call_rpc_sync(\n        configuration_get_max_replica_count_rpc(std::move(req), RPC_CM_GET_MAX_REPLICA_COUNT));\n}\n\nerror_with<configuration_set_max_replica_count_response>\nreplication_ddl_client::set_max_replica_count(const std::string &app_name,\n                                              int32_t max_replica_count)\n{\n    auto req = make_unique<configuration_set_max_replica_count_request>();\n    req->__set_app_name(app_name);\n    req->__set_max_replica_count(max_replica_count);\n    return call_rpc_sync(\n        configuration_set_max_replica_count_rpc(std::move(req), RPC_CM_SET_MAX_REPLICA_COUNT));\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replication_common)\n\nthrift_generate_cpp(\n    METADATA_THRIFT_SRCS\n    METADATA_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/metadata.thrift\n)\n\nthrift_generate_cpp(\n    CONSENSUS_THRIFT_SRCS\n    CONSENSUS_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/consensus.thrift\n)\n\nthrift_generate_cpp(\n    DUPLICATION_THRIFT_SRCS\n    DUPLICATION_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/duplication.thrift\n)\n\nthrift_generate_cpp(\n    BACKUP_THRIFT_SRCS\n    BACKUP_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/backup.thrift\n)\n\nthrift_generate_cpp(\n    META_ADMIN_THRIFT_SRCS\n    META_ADMIN_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/meta_admin.thrift\n)\n\nthrift_generate_cpp(\n    BULK_LOAD_THRIFT_SRCS\n    BULK_LOAD_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/bulk_load.thrift\n)\n\nthrift_generate_cpp(\n    PARTITION_SPLIT_THRIFT_SRCS\n    PARTITION_SPLIT_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/partition_split.thrift\n)\n\nthrift_generate_cpp(\n    REPLICA_ADMIN_THRIFT_SRCS\n    REPLICA_ADMIN_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/replica_admin.thrift\n)\n\nset(MY_PROJ_SRC\n    ${DUPLICATION_THRIFT_SRCS}\n    ${BACKUP_THRIFT_SRCS}\n    ${META_ADMIN_THRIFT_SRCS}\n    ${BULK_LOAD_THRIFT_SRCS}\n    ${PARTITION_SPLIT_THRIFT_SRCS}\n    ${REPLICA_ADMIN_THRIFT_SRCS}\n    ${METADATA_THRIFT_SRCS}\n    ${CONSENSUS_THRIFT_SRCS}\n)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/common/backup.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\n\nnamespace cpp dsn.replication\n\nstruct policy_info\n{\n    1:string        policy_name;\n    2:string        backup_provider_type;\n}\n\n// using configuration_create_app_response to response\nstruct configuration_restore_request\n{\n    1:string            cluster_name;\n    2:string            policy_name;\n    3:i64               time_stamp;   // namely backup_id\n    4:string            app_name;\n    5:i32               app_id;\n    6:string            new_app_name;\n    7:string            backup_provider_name;\n    8:bool              skip_bad_partition;\n    9:optional string   restore_path;\n}\n\nstruct backup_request\n{\n    1:dsn.gpid              pid;\n    2:policy_info           policy;\n    3:string                app_name;\n    4:i64                   backup_id;\n    // user specified backup_path.\n    5:optional string       backup_path;\n}\n\nstruct backup_response\n{\n    1:dsn.error_code    err;\n    2:dsn.gpid          pid;\n    3:i32               progress;  // the progress of the cold_backup\n    4:string            policy_name;\n    5:i64               backup_id;\n    6:i64               checkpoint_total_size;\n}\n\n// clear all backup resources (including backup contexts and checkpoint dirs) of this policy.\nstruct backup_clear_request\n{\n    1:dsn.gpid          pid;\n    2:string            policy_name;\n}\n\nstruct configuration_modify_backup_policy_request\n{\n    1:string                    policy_name;\n    2:optional list<i32>        add_appids;\n    3:optional list<i32>        removal_appids;\n    4:optional i64              new_backup_interval_sec;\n    5:optional i32              backup_history_count_to_keep;\n    6:optional bool             is_disable;\n    7:optional string           start_time; // restrict the start time of each backup, hour:minute\n}\n\nstruct configuration_modify_backup_policy_response\n{\n    1:dsn.error_code        err;\n    2:string                hint_message;\n}\n\nstruct configuration_add_backup_policy_request\n{\n    1:string            backup_provider_type;\n    2:string            policy_name;\n    3:list<i32>         app_ids;\n    4:i64               backup_interval_seconds;\n    5:i32               backup_history_count_to_keep;\n    6:string            start_time;\n}\n\nstruct configuration_add_backup_policy_response\n{\n    1:dsn.error_code        err;\n    2:string                hint_message;\n}\n\nstruct policy_entry\n{\n    1:string        policy_name;\n    2:string        backup_provider_type;\n    3:string        backup_interval_seconds;\n    4:set<i32>      app_ids;\n    5:i32           backup_history_count_to_keep;\n    6:string        start_time;\n    7:bool          is_disable;\n}\n\nstruct backup_entry\n{\n    1:i64           backup_id;\n    2:i64           start_time_ms;\n    3:i64           end_time_ms;\n    4:set<i32>      app_ids;\n}\n\nstruct configuration_query_backup_policy_request\n{\n    1:list<string>      policy_names;\n    2:i32               backup_info_count;\n}\n\nstruct configuration_query_backup_policy_response\n{\n    1:dsn.error_code            err;\n    2:list<policy_entry>        policys;\n    3:list<list<backup_entry>>  backup_infos;\n    4:optional string           hint_msg;\n}\n\nstruct configuration_report_restore_status_request\n{\n    1:dsn.gpid  pid;\n    2:dsn.error_code    restore_status;\n    3:i32        progress; //[0~1000]\n    4:optional string   reason;\n}\n\nstruct configuration_report_restore_status_response\n{\n    1:dsn.error_code    err;\n}\n\nstruct configuration_query_restore_request\n{\n    1:i32   restore_app_id;\n}\n\nstruct configuration_query_restore_response\n{\n    1:dsn.error_code        err;\n    2:list<dsn.error_code>  restore_status;\n    3:list<i32>             restore_progress;\n}\n\nstruct start_backup_app_request\n{\n    1:string             backup_provider_type;\n    2:i32                app_id;\n    // user specified backup_path.\n    3:optional string    backup_path;\n}\n\nstruct start_backup_app_response\n{\n    // Possible error:\n    // - ERR_INVALID_STATE: app is not available or is backing up\n    // - ERR_INVALID_PARAMETERS: backup provider type is invalid\n    // - ERR_SERVICE_NOT_ACTIVE: meta doesn't enable backup service\n    1:dsn.error_code    err;\n    2:string            hint_message;\n    3:optional i64      backup_id;\n}\n\nstruct backup_item\n{\n    1:i64           backup_id;\n    2:string        app_name;\n    3:string        backup_provider_type;\n    // user specified backup_path.\n    4:string        backup_path;\n    5:i64           start_time_ms;\n    6:i64           end_time_ms;\n    7:bool          is_backup_failed;\n}\n\nstruct query_backup_status_request\n{\n    1:i32                 app_id;\n    2:optional i64        backup_id;\n}\n\nstruct query_backup_status_response\n{\n    // Possible error:\n    // - ERR_INVALID_PARAMETERS: no available backup for requested app\n    // - ERR_SERVICE_NOT_ACTIVE: meta doesn't enable backup service\n    1:dsn.error_code                 err;\n    2:string                         hint_message;\n    3:optional list<backup_item>     backup_items;\n}\n"
  },
  {
    "path": "src/common/backup_common.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"backup_common.h\"\n#include \"replica/backup/cold_backup_context.h\"\n\nnamespace dsn {\nnamespace replication {\nconst std::string cold_backup_constant::APP_METADATA(\"app_metadata\");\nconst std::string cold_backup_constant::APP_BACKUP_STATUS(\"app_backup_status\");\nconst std::string cold_backup_constant::CURRENT_CHECKPOINT(\"current_checkpoint\");\nconst std::string cold_backup_constant::BACKUP_METADATA(\"backup_metadata\");\nconst std::string cold_backup_constant::BACKUP_INFO(\"backup_info\");\nconst int32_t cold_backup_constant::PROGRESS_FINISHED = 1000;\n\nconst std::string backup_restore_constant::FORCE_RESTORE(\"restore.force_restore\");\nconst std::string backup_restore_constant::BLOCK_SERVICE_PROVIDER(\"restore.block_service_provider\");\nconst std::string backup_restore_constant::CLUSTER_NAME(\"restore.cluster_name\");\nconst std::string backup_restore_constant::POLICY_NAME(\"restore.policy_name\");\nconst std::string backup_restore_constant::APP_NAME(\"restore.app_name\");\nconst std::string backup_restore_constant::APP_ID(\"restore.app_id\");\nconst std::string backup_restore_constant::BACKUP_ID(\"restore.backup_id\");\nconst std::string backup_restore_constant::SKIP_BAD_PARTITION(\"restore.skip_bad_partition\");\nconst std::string backup_restore_constant::RESTORE_PATH(\"restore.restore_path\");\n\nnamespace cold_backup {\n\nstd::string get_backup_path(const std::string &root, int64_t backup_id)\n{\n    return root + \"/\" + std::to_string(backup_id);\n}\n\nstd::string get_backup_info_file(const std::string &root, int64_t backup_id)\n{\n    return get_backup_path(root, backup_id) + \"/\" + cold_backup_constant::BACKUP_INFO;\n}\n\nstd::string get_replica_backup_path(const std::string &root,\n                                    const std::string &app_name,\n                                    gpid pid,\n                                    int64_t backup_id)\n{\n    std::string str_app = app_name + \"_\" + std::to_string(pid.get_app_id());\n    return get_backup_path(root, backup_id) + \"/\" + str_app + \"/\" +\n           std::to_string(pid.get_partition_index());\n}\n\nstd::string get_app_meta_backup_path(const std::string &root,\n                                     const std::string &app_name,\n                                     int32_t app_id,\n                                     int64_t backup_id)\n{\n    std::string str_app = app_name + \"_\" + std::to_string(app_id);\n    return get_backup_path(root, backup_id) + \"/\" + str_app + \"/meta\";\n}\n\nstd::string get_app_metadata_file(const std::string &root,\n                                  const std::string &app_name,\n                                  int32_t app_id,\n                                  int64_t backup_id)\n{\n    return get_app_meta_backup_path(root, app_name, app_id, backup_id) + \"/\" +\n           cold_backup_constant::APP_METADATA;\n}\n\nstd::string get_app_backup_status_file(const std::string &root,\n                                       const std::string &app_name,\n                                       int32_t app_id,\n                                       int64_t backup_id)\n{\n    return get_app_meta_backup_path(root, app_name, app_id, backup_id) + \"/\" +\n           cold_backup_constant::APP_BACKUP_STATUS;\n}\n\nstd::string get_current_chkpt_file(const std::string &root,\n                                   const std::string &app_name,\n                                   gpid pid,\n                                   int64_t backup_id)\n{\n    return get_replica_backup_path(root, app_name, pid, backup_id) + \"/\" +\n           cold_backup_constant::CURRENT_CHECKPOINT;\n}\n\nstd::string get_remote_chkpt_dirname()\n{\n    // here using server address as suffix of remote_chkpt_dirname\n    std::string local_address = dsn_primary_address().ipv4_str();\n    std::string port = std::to_string(dsn_primary_address().port());\n    return \"chkpt_\" + local_address + \"_\" + port;\n}\n\nstd::string get_remote_chkpt_dir(const std::string &root,\n                                 const std::string &app_name,\n                                 gpid pid,\n                                 int64_t backup_id)\n{\n    return get_replica_backup_path(root, app_name, pid, backup_id) + \"/\" +\n           get_remote_chkpt_dirname();\n}\n\nstd::string get_remote_chkpt_meta_file(const std::string &root,\n                                       const std::string &app_name,\n                                       gpid pid,\n                                       int64_t backup_id)\n{\n    return get_remote_chkpt_dir(root, app_name, pid, backup_id) + \"/\" +\n           cold_backup_constant::BACKUP_METADATA;\n}\n\n} // namespace cold_backup\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/backup_common.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n#include <dsn/tool-api/gpid.h>\n#include \"backup_types.h\"\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass cold_backup_constant\n{\npublic:\n    static const std::string APP_METADATA;\n    static const std::string APP_BACKUP_STATUS;\n    static const std::string CURRENT_CHECKPOINT;\n    static const std::string BACKUP_METADATA;\n    static const std::string BACKUP_INFO;\n    static const int32_t PROGRESS_FINISHED;\n};\n\ntypedef rpc_holder<backup_request, backup_response> backup_rpc;\n\nclass backup_restore_constant\n{\npublic:\n    static const std::string FORCE_RESTORE;\n    static const std::string BLOCK_SERVICE_PROVIDER;\n    static const std::string CLUSTER_NAME;\n    static const std::string POLICY_NAME;\n    static const std::string APP_NAME;\n    static const std::string APP_ID;\n    static const std::string BACKUP_ID;\n    static const std::string SKIP_BAD_PARTITION;\n    static const std::string RESTORE_PATH;\n};\n\nnamespace cold_backup {\n\n//\n//  Attention: when compose the path on block service, we use appname_appid, because appname_appid\n//              can identify the case below:\n//     -- case: you create one app with name A and it's appid is 1, then after backup a time later,\n//              you drop the table, then create a new app with name A and with appid 3\n//              using appname_appid, can idenfity the backup data is belong to which app\n\n// The directory structure on block service\n//\n//      <root>/<backup_id>/<appname_appid>/meta/app_metadata\n//                                        /meta/app_backup_status\n//                                        /partition_1/checkpoint@ip:port/***.sst\n//                                        /partition_1/checkpoint@ip:port/CURRENT\n//                                        /partition_1/checkpoint@ip:port/backup_metadata\n//                                        /partition_1/current_checkpoint\n//      <root>/<backup_id>/<appname_appid>/meta/app_metadata\n//                                        /meta/app_backup_status\n//                                        /partition_1/checkpoint@ip:port/***.sst\n//                                        /partition_1/checkpoint@ip:port/CURRENT\n//                                        /partition_1/checkpoint@ip:port/backup_metadata\n//                                        /partition_1/current_checkpoint\n//      <root>/<backup_id>/backup_info\n//\n\n//\n// the purpose of some file:\n//      1, app_metadata : the metadata of the app, the same with the app's app_info\n//      2, app_backup_status: the flag file, represent whether the app have finish backup, if this\n//         file exist on block filesystem, backup is finished, otherwise, app haven't finished\n//         backup, we ignore its context\n//      3, backup_metadata : the file to statistic the information of a checkpoint, include all the\n//         file's name, size and md5\n//      4, current_checkpoint : specifing which checkpoint directory is valid\n//      5, backup_info : recording the information of this backup\n//\n\n// compose the path for app on block service\n// input:\n//  -- root:  the prefix of path\n// return:\n//      the path: <root>/<backup_id>\nstd::string get_backup_path(const std::string &root, int64_t backup_id);\n\n// return: <root>/<backup_id>/backup_info\nstd::string get_backup_info_file(const std::string &root, int64_t backup_id);\n\n// compose the path for replica on block service\n// input:\n//  -- root:  the prefix of the path\n// return:\n//      the path: <root>/<backup_id>/<appname_appid>/<partition_index>\nstd::string get_replica_backup_path(const std::string &root,\n                                    const std::string &app_name,\n                                    gpid pid,\n                                    int64_t backup_id);\n\n// compose the path for meta on block service\n// input:\n//  -- root:  the prefix of the path\n// return:\n//      the path: <root>/<backup_id>/<appname_appid>/meta\nstd::string get_app_meta_backup_path(const std::string &root,\n                                     const std::string &app_name,\n                                     int32_t app_id,\n                                     int64_t backup_id);\n\n// compose the absolute path(AP) of app_metadata_file on block service\n// input:\n//  -- prefix:      the prefix of AP\n// return:\n//      the AP of app meta data file:\n//      <root>/<backup_id>/<appname_appid>/meta/app_metadata\nstd::string get_app_metadata_file(const std::string &root,\n                                  const std::string &app_name,\n                                  int32_t app_id,\n                                  int64_t backup_id);\n\n// compose the absolute path(AP) of app_backup_status file on block service\n// input:\n//  -- prefix:      the prefix of AP\n// return:\n//      the AP of flag-file, which represent whether the app have finished backup:\n//      <root>/<backup_id>/<appname_appid>/meta/app_backup_status\nstd::string get_app_backup_status_file(const std::string &root,\n                                       const std::string &app_name,\n                                       int32_t app_id,\n                                       int64_t backup_id);\n\n// compose the absolute path(AP) of current chekpoint file on block service\n// input:\n//  -- root:      the prefix of AP on block service\n//  -- pid:         gpid of replica\n// return:\n//      the AP of current checkpoint file:\n//      <root>/<backup_id>/<appname_appid>/<partition_index>/current_checkpoint\nstd::string get_current_chkpt_file(const std::string &root,\n                                   const std::string &app_name,\n                                   gpid pid,\n                                   int64_t backup_id);\n\n// compose the checkpoint directory name on block service\n// return:\n//      checkpoint directory name: checkpoint@<ip:port>\nstd::string get_remote_chkpt_dirname();\n\n// compose the absolute path(AP) of checkpoint dir for replica on block service\n// input:\n//  -- root:       the prefix of the AP\n//  -- pid:          gpid of replcia\n// return:\n//      the AP of the checkpoint dir:\n//      <root>/<backup_id>/<appname_appid>/<partition_index>/checkpoint@<ip:port>\nstd::string get_remote_chkpt_dir(const std::string &root,\n                                 const std::string &app_name,\n                                 gpid pid,\n                                 int64_t backup_id);\n\n// compose the absolute path(AP) of checkpoint meta for replica on block service\n// input:\n//  -- root:       the prefix of the AP\n//  -- pid:          gpid of replcia\n// return:\n//      the AP of the checkpoint file metadata:\n//      <root>/<backup_id>/<appname_appid>/<partition_index>/checkpoint@<ip:port>/backup_metadata\nstd::string get_remote_chkpt_meta_file(const std::string &root,\n                                       const std::string &app_name,\n                                       gpid pid,\n                                       int64_t backup_id);\n\n} // namespace cold_backup\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/bulk_load.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\ninclude \"metadata.thrift\"\n\nnamespace cpp dsn.replication\n\n// app partition bulk load status\nenum bulk_load_status\n{\n    BLS_INVALID,\n    BLS_DOWNLOADING,\n    BLS_DOWNLOADED,\n    BLS_INGESTING,\n    BLS_SUCCEED,\n    BLS_FAILED,\n    BLS_PAUSING,\n    BLS_PAUSED,\n    BLS_CANCELED\n}\n\nenum ingestion_status\n{\n    IS_INVALID,\n    IS_RUNNING,\n    IS_SUCCEED,\n    IS_FAILED\n}\n\nstruct bulk_load_metadata\n{\n    1:list<metadata.file_meta>   files;\n    2:i64               file_total_size;\n}\n\n// client -> meta, start bulk load\nstruct start_bulk_load_request\n{\n    1:string    app_name;\n    2:string    cluster_name;\n    3:string    file_provider_type;\n    4:string    remote_root_path;\n    5:bool      ingest_behind = false;\n}\n\nstruct start_bulk_load_response\n{\n    // Possible error:\n    // - ERR_OK: start bulk load succeed\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_BUSY: app is already executing bulk load\n    // - ERR_INVALID_PARAMETERS: wrong file_provider type\n    // - ERR_FILE_OPERATION_FAILED: remote file_provider error\n    // - ERR_OBJECT_NOT_FOUND: bulk_load_info not exist on file_provider\n    // - ERR_CORRUPTION: bulk_load_info is damaged on file_provider\n    // - ERR_INCONSISTENT_STATE: app_id or partition_count inconsistent\n    1:dsn.error_code    err;\n    2:string            hint_msg;\n}\n\nstruct partition_bulk_load_state\n{\n    1:optional i32              download_progress = 0;\n    2:optional dsn.error_code   download_status;\n    3:optional ingestion_status ingest_status = ingestion_status.IS_INVALID;\n    4:optional bool             is_cleaned_up = false;\n    5:optional bool             is_paused = false;\n}\n\n// meta server -> replica server\nstruct bulk_load_request\n{\n    1:dsn.gpid          pid;\n    2:string            app_name;\n    3:dsn.rpc_address   primary_addr;\n    4:string            remote_provider_name;\n    5:string            cluster_name;\n    6:i64               ballot;\n    7:bulk_load_status  meta_bulk_load_status;\n    8:bool              query_bulk_load_metadata;\n    9:string            remote_root_path;\n}\n\nstruct bulk_load_response\n{\n    // Possible error:\n    // - ERR_OBJECT_NOT_FOUND: replica not found\n    // - ERR_INVALID_STATE: replica has invalid state\n    // - ERR_BUSY: node has enough replica executing bulk load downloading\n    // - ERR_FILE_OPERATION_FAILED: local file system error during bulk load downloading\n    // - ERR_FS_INTERNAL: remote file provider error during bulk load downloading\n    // - ERR_CORRUPTION: metadata corruption during bulk load downloading\n    1:dsn.error_code                                    err;\n    2:dsn.gpid                                          pid;\n    3:string                                            app_name;\n    4:bulk_load_status                                  primary_bulk_load_status;\n    5:map<dsn.rpc_address, partition_bulk_load_state>   group_bulk_load_state;\n    6:optional bulk_load_metadata                       metadata;\n    7:optional i32                                      total_download_progress;\n    8:optional bool                                     is_group_ingestion_finished;\n    9:optional bool                                     is_group_bulk_load_context_cleaned_up;\n    10:optional bool                                    is_group_bulk_load_paused;\n}\n\n// primary -> secondary\nstruct group_bulk_load_request\n{\n    1:string                        app_name;\n    2:dsn.rpc_address               target_address;\n    3:metadata.replica_configuration         config;\n    4:string                        provider_name;\n    5:string                        cluster_name;\n    6:bulk_load_status              meta_bulk_load_status;\n    7:string                        remote_root_path;\n}\n\nstruct group_bulk_load_response\n{\n    // Possible error:\n    // - ERR_OBJECT_NOT_FOUND: replica not found\n    // - ERR_VERSION_OUTDATED: request out-dated\n    // - ERR_INVALID_STATE: replica has invalid state\n    // - ERR_BUSY: node has enough replica executing bulk load downloading\n    // - ERR_FILE_OPERATION_FAILED: local file system error during bulk load downloading\n    // - ERR_FS_INTERNAL: remote file provider error during bulk load downloading\n    // - ERR_CORRUPTION: metadata corruption during bulk load downloading\n    1:dsn.error_code            err;\n    2:bulk_load_status          status;\n    3:partition_bulk_load_state bulk_load_state;\n}\n\n// meta server -> replica server\nstruct ingestion_request\n{\n    1:string                app_name;\n    2:bulk_load_metadata    metadata;\n    3:bool                  ingest_behind;\n    4:i64                   ballot;\n    5:bool                  verify_before_ingest;\n}\n\nstruct ingestion_response\n{\n    // Possible errors:\n    // - ERR_TRY_AGAIN: retry ingestion\n    1:dsn.error_code    err;\n    // rocksdb ingestion error code\n    2:i32               rocksdb_error;\n}\n\nenum bulk_load_control_type\n{\n    BLC_PAUSE,\n    BLC_RESTART,\n    BLC_CANCEL,\n    BLC_FORCE_CANCEL\n}\n\n// client -> meta server, pause/restart/cancel/force_cancel bulk load\nstruct control_bulk_load_request\n{\n    1:string                    app_name;\n    2:bulk_load_control_type    type;\n}\n\nstruct control_bulk_load_response\n{\n    // Possible error:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INACTIVE_STATE: app is not executing bulk load\n    // - ERR_INVALID_STATE: current bulk load process can not be paused/restarted/canceled\n    1:dsn.error_code    err;\n    2:optional string   hint_msg;\n}\n\nstruct query_bulk_load_request\n{\n    1:string   app_name;\n}\n\nstruct query_bulk_load_response\n{\n    // Possible error:\n    // - ERR_OK\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_FILE_OPERATION_FAILED: local file system error\n    // - ERR_FS_INTERNAL: remote file system error\n    // - ERR_CORRUPTION: file not exist or damaged\n    // - ERR_INGESTION_FAILED: ingest failed\n    // - ERR_RETRY_EXHAUSTED: retry too many times\n    1:dsn.error_code                                        err;\n    2:string                                                app_name;\n    3:bulk_load_status                                      app_status;\n    4:list<bulk_load_status>                                partitions_status;\n    5:i32                                                   max_replica_count;\n    // detailed bulk load state for each replica\n    6:list<map<dsn.rpc_address, partition_bulk_load_state>> bulk_load_states;\n    7:optional string                                       hint_msg;\n    8:optional bool                                         is_bulk_loading;\n}\n\nstruct clear_bulk_load_state_request\n{\n    1:string                    app_name;\n}\n\nstruct clear_bulk_load_state_response\n{\n    // Possible error:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INVALID_STATE: app is executing bulk load\n    1:dsn.error_code    err;\n    2:string            hint_msg;\n}\n"
  },
  {
    "path": "src/common/bulk_load_common.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"bulk_load_common.h\"\n\nnamespace dsn {\nnamespace replication {\nconst std::string bulk_load_constant::BULK_LOAD_INFO(\"bulk_load_info\");\nconst int32_t bulk_load_constant::BULK_LOAD_REQUEST_INTERVAL = 10;\nconst std::string bulk_load_constant::BULK_LOAD_METADATA(\"bulk_load_metadata\");\nconst std::string bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR(\"bulk_load\");\nconst int32_t bulk_load_constant::PROGRESS_FINISHED = 100;\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/bulk_load_common.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"bulk_load_types.h\"\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nnamespace replication {\n\ntypedef rpc_holder<start_bulk_load_request, start_bulk_load_response> start_bulk_load_rpc;\ntypedef rpc_holder<bulk_load_request, bulk_load_response> bulk_load_rpc;\ntypedef rpc_holder<control_bulk_load_request, control_bulk_load_response> control_bulk_load_rpc;\ntypedef rpc_holder<query_bulk_load_request, query_bulk_load_response> query_bulk_load_rpc;\ntypedef rpc_holder<clear_bulk_load_state_request, clear_bulk_load_state_response>\n    clear_bulk_load_rpc;\n\nclass bulk_load_constant\n{\npublic:\n    static const std::string BULK_LOAD_INFO;\n    static const int32_t BULK_LOAD_REQUEST_INTERVAL;\n    static const std::string BULK_LOAD_METADATA;\n    static const std::string BULK_LOAD_LOCAL_ROOT_DIR;\n    static const int32_t PROGRESS_FINISHED;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/common.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/common.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nDSN_DEFINE_string(\"replication\", cluster_name, \"\", \"name of this cluster\");\n\n/*extern*/ const char *get_current_cluster_name()\n{\n    dassert(strlen(FLAGS_cluster_name) != 0, \"cluster_name is not set\");\n    return FLAGS_cluster_name;\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/common/consensus.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\ninclude \"metadata.thrift\"\n\nnamespace cpp dsn.replication\n\nstruct mutation_header\n{\n    1:dsn.gpid             pid;\n    2:i64                  ballot;\n    3:i64                  decree;\n    4:i64                  log_offset;\n    5:i64                  last_committed_decree;\n    6:i64                  timestamp;\n}\n\nstruct mutation_update\n{\n    1:dsn.task_code  code;\n\n    //the serialization type of data, this need to store in log and replicate to secondaries by primary\n    2:i32            serialization_type;\n    3:dsn.blob       data;\n    4:optional i64   start_time_ns;\n}\n\nstruct mutation_data\n{\n    1:mutation_header        header;\n    2:list<mutation_update>  updates;\n}\n\nstruct prepare_msg\n{\n    1:metadata.replica_configuration config;\n    2:mutation_data         mu;\n}\n\nenum read_semantic\n{\n    ReadInvalid,\n    ReadLastUpdate,\n    ReadOutdated,\n    ReadSnapshot,\n}\n\nstruct read_request_header\n{\n    1:dsn.gpid pid;\n    2:dsn.task_code       code;\n    3:read_semantic       semantic = read_semantic.ReadLastUpdate;\n    4:i64                 version_decree = -1;\n}\n\nstruct write_request_header\n{\n    1:dsn.gpid pid;\n    2:dsn.task_code       code;\n}\n\nstruct rw_response_header\n{\n    1:dsn.error_code      err;\n}\n\nstruct prepare_ack\n{\n    1:dsn.gpid pid;\n    2:dsn.error_code      err;\n    3:i64                 ballot;\n    4:i64                 decree;\n    5:i64                 last_committed_decree_in_app;\n    6:i64                 last_committed_decree_in_prepare_list;\n    7:optional i64        receive_timestamp;\n    8:optional i64        response_timestamp;\n}\n\nenum learn_type\n{\n    LT_INVALID,\n    LT_CACHE,\n    LT_APP,\n    LT_LOG,\n}\n\nstruct learn_state\n{\n    1:i64            from_decree_excluded;\n    2:i64            to_decree_included;\n    3:dsn.blob       meta;\n    4:list<string>   files;\n\n    // Used by duplication. Holds the start_decree of this round of learn.\n    5:optional i64   learn_start_decree;\n}\n\nenum learner_status\n{\n    LearningInvalid,\n    LearningWithoutPrepare,\n    LearningWithPrepareTransient,\n    LearningWithPrepare,\n    LearningSucceeded,\n    LearningFailed,\n}\n\nstruct learn_request\n{\n    1:dsn.gpid pid;\n    2:dsn.rpc_address     learner; // learner's address\n    3:i64                 signature; // learning signature\n    4:i64                 last_committed_decree_in_app; // last committed decree of learner's app\n    5:i64                 last_committed_decree_in_prepare_list; // last committed decree of learner's prepare list\n    6:dsn.blob            app_specific_learn_request; // learning request data by app.prepare_learn_request()\n\n    // Used by duplication to determine if learner has enough logs on disk to\n    // be duplicated (ie. max_gced_decree < confirmed_decree), if not,\n    // learnee will copy the missing logs.\n    7:optional i64        max_gced_decree;\n}\n\nstruct learn_response\n{\n    1:dsn.error_code        err; // error code\n    2:metadata.replica_configuration config; // learner's replica config\n    3:i64                   last_committed_decree; // learnee's last committed decree\n    4:i64                   prepare_start_decree; // prepare start decree\n    5:learn_type            type = learn_type.LT_INVALID; // learning type: CACHE, LOG, APP\n    6:learn_state           state; // learning data, including memory data and files\n    7:dsn.rpc_address       address; // learnee's address\n    8:string                base_local_dir; // base dir of files on learnee\n    9:optional string replica_disk_tag; // the disk tag of learnee located\n}\n\nstruct learn_notify_response\n{\n    1:dsn.gpid pid;\n    2:dsn.error_code        err; // error code\n    3:i64                   signature; // learning signature\n}\n\nstruct group_check_request\n{\n    1:dsn.layer2.app_info   app;\n    2:dsn.rpc_address       node;\n    3:metadata.replica_configuration config;\n    4:i64                   last_committed_decree;\n\n    // Used to sync duplication progress between primaries\n    // and secondaries, so that secondaries can be allowed to GC\n    // their WALs after this decree.\n    5:optional i64          confirmed_decree;\n\n    // Used to deliver child gpid and meta_split_status during partition split\n    6:optional dsn.gpid     child_gpid;\n    7:optional metadata.split_status meta_split_status;\n}\n\nstruct group_check_response\n{\n    1:dsn.gpid pid;\n    2:dsn.error_code      err;\n    3:i64                 last_committed_decree_in_app;\n    4:i64                 last_committed_decree_in_prepare_list;\n    5:learner_status      learner_status_ = learner_status.LearningInvalid;\n    6:i64                 learner_signature;\n    7:dsn.rpc_address     node;\n    // Used for pause or cancel partition split\n    // if secondary pause or cancel split succeed, is_split_stopped = true\n    8:optional bool       is_split_stopped;\n    9:optional metadata.disk_status disk_status = metadata.disk_status.NORMAL;\n}\n\n"
  },
  {
    "path": "src/common/duplication.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\n\nnamespace cpp dsn.replication\n\n//  - INIT  -> PREPARE\n//  - PREPARE -> APP\n//  - APP -> LOG\n//  NOTE: Just LOG and PAUSE can be transferred states to each other\n//  - LOG -> PAUSE\n//  - PAUSE -> LOG\n//  - ALL -> REMOVED\nenum duplication_status\n{\n    DS_INIT = 0,\n    DS_PREPARE,// replica prepare latest checkpoint for follower\n    DS_APP,// follower start duplicate checkpoint\n    DS_LOG,// master start batch send plog to follower\n    DS_PAUSE,\n    DS_REMOVED,\n}\n\n// How duplication reacts on permanent failure.\nenum duplication_fail_mode\n{\n    // The default mode. If some permanent failure occurred that makes duplication\n    // blocked, it will retry forever until external interference.\n    FAIL_SLOW = 0,\n\n    // Skip the writes that failed to duplicate, which means minor data loss on the remote cluster.\n    // This will certainly achieve better stability of the system.\n    FAIL_SKIP,\n\n    // Stop immediately after it ensures itself unable to duplicate.\n    // WARN: this mode kills the server process, replicas on the server will all be effected.\n    FAIL_FAST\n}\n\n// This request is sent from client to meta.\nstruct duplication_add_request\n{\n    1:string  app_name;\n    2:string  remote_cluster_name;\n    // whether to duplicate checkpoint.\n    // - if true, duplication start state=DS_PREPARE,\n    // server will use nfs duplicate checkpoint to follower cluster,\n    // - if false, duplication start state=DS_LOG,\n    // server will replay and send plog mutation to follower cluster derectly\n    3:optional bool is_duplicating_checkpoint = true;\n}\n\nstruct duplication_add_response\n{\n    // Possible errors:\n    // - ERR_INVALID_PARAMETERS:\n    //   the address of remote cluster is not well configured in meta sever.\n    1:dsn.error_code   err;\n    2:i32              appid;\n    3:i32              dupid;\n    4:optional string  hint;\n}\n\n// This request is sent from client to meta.\nstruct duplication_modify_request\n{\n    1:string                    app_name;\n    2:i32                       dupid;\n    3:optional duplication_status status;\n    4:optional duplication_fail_mode fail_mode;\n}\n\nstruct duplication_modify_response\n{\n    // Possible errors:\n    // - ERR_APP_NOT_EXIST: app is not found\n    // - ERR_OBJECT_NOT_FOUND: duplication is not found\n    // - ERR_BUSY: busy for updating state\n    // - ERR_INVALID_PARAMETERS: illegal request\n    1:dsn.error_code   err;\n    2:i32              appid;\n}\n\nstruct duplication_entry\n{\n    1:i32                  dupid;\n    2:duplication_status   status;\n    3:string               remote;\n    4:i64                  create_ts;\n\n    // partition_index => confirmed decree\n    5:optional map<i32, i64> progress;\n\n    7:optional duplication_fail_mode fail_mode;\n}\n\n// This request is sent from client to meta.\nstruct duplication_query_request\n{\n    1:string                    app_name;\n}\n\nstruct duplication_query_response\n{\n    // Possible errors:\n    // - ERR_APP_NOT_EXIST: app is not found\n    1:dsn.error_code             err;\n    3:i32                        appid;\n    4:list<duplication_entry>    entry_list;\n}\n\nstruct duplication_confirm_entry\n{\n    1:i32       dupid;\n    2:i64       confirmed_decree;\n    3:optional bool checkpoint_prepared = false;\n}\n\n// This is an internal RPC sent from replica server to meta.\n// It's a server-level RPC.\n// After starts up, the replica server periodically collects and uploads confirmed points\n// to meta server, so that clients can directly query through meta for the current progress\n// of a duplication.\n// Moreover, if a primary replica is detected to be crashed, the duplication will be restarted\n// on the new primary, continuing from the progress persisted on meta.\n// Another function of this rpc is that it synchronizes duplication metadata updates\n// (like addition or removal of duplication) between meta and replica.\nstruct duplication_sync_request\n{\n    // the address of of the replica server who sends this request\n    // TODO(wutao1): remove this field and get the source address by dsn_msg_from_address\n    1:dsn.rpc_address                                   node;\n\n    2:map<dsn.gpid, list<duplication_confirm_entry>>    confirm_list;\n}\n\nstruct duplication_sync_response\n{\n    // Possible errors:\n    // - ERR_OBJECT_NOT_FOUND: node is not found\n    1:dsn.error_code                                   err;\n\n    // appid -> map<dupid, dup_entry>\n    // this rpc will not return the apps that were not assigned duplication.\n    2:map<i32, map<i32, duplication_entry>>            dup_map;\n}\n"
  },
  {
    "path": "src/common/duplication_common.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utils/time_utils.h>\n#include <nlohmann/json.hpp>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint32(\"replication\",\n                  duplicate_log_batch_bytes,\n                  4096,\n                  \"send mutation log batch bytes size per rpc\");\nDSN_TAG_VARIABLE(duplicate_log_batch_bytes, FT_MUTABLE);\n\nconst std::string duplication_constants::kDuplicationCheckpointRootDir /*NOLINT*/ = \"duplication\";\nconst std::string duplication_constants::kClustersSectionName /*NOLINT*/ = \"pegasus.clusters\";\nconst std::string duplication_constants::kDuplicationEnvMasterClusterKey /*NOLINT*/ =\n    \"duplication.master_cluster\";\nconst std::string duplication_constants::kDuplicationEnvMasterMetasKey /*NOLINT*/ =\n    \"duplication.master_metas\";\n\n/*extern*/ const char *duplication_status_to_string(duplication_status::type status)\n{\n    auto it = _duplication_status_VALUES_TO_NAMES.find(status);\n    dassert(it != _duplication_status_VALUES_TO_NAMES.end(),\n            \"unexpected type of duplication_status: %d\",\n            status);\n    return it->second;\n}\n\n/*extern*/ const char *duplication_fail_mode_to_string(duplication_fail_mode::type fmode)\n{\n    auto it = _duplication_fail_mode_VALUES_TO_NAMES.find(fmode);\n    dassert(it != _duplication_fail_mode_VALUES_TO_NAMES.end(),\n            \"unexpected type of duplication_fail_mode: %d\",\n            fmode);\n    return it->second;\n}\n\nnamespace internal {\n\nclass duplication_group_registry : public utils::singleton<duplication_group_registry>\n{\npublic:\n    error_with<uint8_t> get_cluster_id(const std::string &cluster_name) const\n    {\n        if (cluster_name.empty()) {\n            return error_s::make(ERR_INVALID_PARAMETERS, \"cluster_name is empty\");\n        }\n        if (_group.empty()) {\n            return error_s::make(ERR_OBJECT_NOT_FOUND, \"`duplication-group` is not configured\");\n        }\n\n        auto it = _group.find(cluster_name);\n        if (it == _group.end()) {\n            return error_s::make(ERR_OBJECT_NOT_FOUND, \"failed to get cluster id for \")\n                   << cluster_name.data();\n        }\n        return it->second;\n    }\n\n    const std::map<std::string, uint8_t> &get_duplication_group() { return _group; }\n    const std::set<uint8_t> &get_distinct_cluster_id_set() { return _distinct_cids; }\n\nprivate:\n    duplication_group_registry()\n    {\n        std::vector<std::string> clusters;\n        dsn_config_get_all_keys(\"duplication-group\", clusters);\n        for (std::string &cluster : clusters) {\n            int64_t cluster_id =\n                dsn_config_get_value_int64(\"duplication-group\", cluster.data(), 0, \"\");\n            dassert(cluster_id < 128 && cluster_id > 0,\n                    \"cluster_id(%zd) for %s should be in [1, 127]\",\n                    cluster_id,\n                    cluster.data());\n            _group.emplace(cluster, static_cast<uint8_t>(cluster_id));\n        }\n        dassert_f(clusters.size() == _group.size(),\n                  \"there might be duplicate cluster_name in configuration\");\n\n        for (const auto &kv : _group) {\n            _distinct_cids.insert(kv.second);\n        }\n        dassert_f(_distinct_cids.size() == _group.size(),\n                  \"there might be duplicate cluster_id in configuration\");\n    }\n    ~duplication_group_registry() = default;\n\n    std::map<std::string, uint8_t> _group;\n    std::set<uint8_t> _distinct_cids;\n\n    friend class utils::singleton<duplication_group_registry>;\n};\n\n} // namespace internal\n\n/*extern*/ error_with<uint8_t> get_duplication_cluster_id(const std::string &cluster_name)\n{\n    return internal::duplication_group_registry::instance().get_cluster_id(cluster_name);\n}\n\n// TODO(wutao1): implement our C++ version of `TSimpleJSONProtocol` if there're\n//               more cases for converting thrift to JSON\nstatic nlohmann::json duplication_entry_to_json(const duplication_entry &ent)\n{\n    char ts_buf[30];\n    utils::time_ms_to_date_time(static_cast<uint64_t>(ent.create_ts), ts_buf, sizeof(ts_buf));\n    nlohmann::json json{\n        {\"dupid\", ent.dupid},\n        {\"create_ts\", ts_buf},\n        {\"remote\", ent.remote},\n        {\"status\", duplication_status_to_string(ent.status)},\n        {\"fail_mode\", duplication_fail_mode_to_string(ent.fail_mode)},\n    };\n    if (ent.__isset.progress) {\n        nlohmann::json sub_json;\n        for (const auto &p : ent.progress) {\n            sub_json[std::to_string(p.first)] = p.second;\n        }\n        json[\"progress\"] = sub_json;\n    }\n    return json;\n}\n\n/*extern*/ std::string duplication_entry_to_string(const duplication_entry &ent)\n{\n    return duplication_entry_to_json(ent).dump();\n}\n\n/*extern*/ std::string duplication_query_response_to_string(const duplication_query_response &resp)\n{\n    nlohmann::json json;\n    int i = 1;\n    for (const auto &ent : resp.entry_list) {\n        json[\"appid\"] = resp.appid;\n        json[std::to_string(i)] = duplication_entry_to_json(ent);\n        i++;\n    }\n    return json.dump();\n}\n\n/*extern*/ const std::map<std::string, uint8_t> &get_duplication_group()\n{\n    return internal::duplication_group_registry::instance().get_duplication_group();\n}\n\n/*extern*/ const std::set<uint8_t> &get_distinct_cluster_id_set()\n{\n    return internal::duplication_group_registry::instance().get_distinct_cluster_id_set();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/fs_manager.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     fs_manager's implement: used to track the disk position for all the allocated replicas\n *\n * Revision history:\n *     2017-08-08: sunweijie@xiaomi.com, first draft\n */\n\n#include \"fs_manager.h\"\n#include <dsn/utility/utils.h>\n#include <dsn/utility/filesystem.h>\n#include <thread>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_int32(\"replication\",\n                 disk_min_available_space_ratio,\n                 10,\n                 \"if disk available space ratio \"\n                 \"is below this value, this \"\n                 \"disk will be considered as \"\n                 \"space insufficient\");\nDSN_TAG_VARIABLE(disk_min_available_space_ratio, FT_MUTABLE);\n\nunsigned dir_node::replicas_count() const\n{\n    unsigned sum = 0;\n    for (const auto &s : holding_replicas) {\n        sum += s.second.size();\n    }\n    return sum;\n}\n\nunsigned dir_node::replicas_count(app_id id) const\n{\n    const auto iter = holding_replicas.find(id);\n    if (iter == holding_replicas.end())\n        return 0;\n    return iter->second.size();\n}\n\nbool dir_node::has(const gpid &pid) const\n{\n    auto iter = holding_replicas.find(pid.get_app_id());\n    if (iter == holding_replicas.end())\n        return false;\n    return iter->second.find(pid) != iter->second.end();\n}\n\nunsigned dir_node::remove(const gpid &pid)\n{\n    auto iter = holding_replicas.find(pid.get_app_id());\n    if (iter == holding_replicas.end())\n        return 0;\n    return iter->second.erase(pid);\n}\n\nbool dir_node::update_disk_stat(const bool update_disk_status)\n{\n    FAIL_POINT_INJECT_F(\"update_disk_stat\", [](string_view) { return false; });\n    dsn::utils::filesystem::disk_space_info info;\n    if (!dsn::utils::filesystem::get_disk_space_info(full_dir, info)) {\n        derror_f(\"update disk space failed: dir = {}\", full_dir);\n        return false;\n    }\n    // update disk space info\n    disk_capacity_mb = info.capacity / 1024 / 1024;\n    disk_available_mb = info.available / 1024 / 1024;\n    disk_available_ratio = static_cast<int>(\n        disk_capacity_mb == 0 ? 0 : std::round(disk_available_mb * 100.0 / disk_capacity_mb));\n\n    if (!update_disk_status) {\n        ddebug_f(\"update disk space succeed: dir = {}, capacity_mb = {}, available_mb = {}, \"\n                 \"available_ratio = {}%\",\n                 full_dir,\n                 disk_capacity_mb,\n                 disk_available_mb,\n                 disk_available_ratio);\n        return false;\n    }\n    auto old_status = status;\n    auto new_status = disk_available_ratio < FLAGS_disk_min_available_space_ratio\n                          ? disk_status::SPACE_INSUFFICIENT\n                          : disk_status::NORMAL;\n    if (old_status != new_status) {\n        status = new_status;\n    }\n    ddebug_f(\"update disk space succeed: dir = {}, capacity_mb = {}, available_mb = {}, \"\n             \"available_ratio = {}%, disk_status = {}\",\n             full_dir,\n             disk_capacity_mb,\n             disk_available_mb,\n             disk_available_ratio,\n             enum_to_string(status));\n    return (old_status != new_status);\n}\n\nfs_manager::fs_manager(bool for_test)\n{\n    if (!for_test) {\n        _counter_total_capacity_mb.init_app_counter(\"eon.replica_stub\",\n                                                    \"disk.capacity.total(MB)\",\n                                                    COUNTER_TYPE_NUMBER,\n                                                    \"total disk capacity in MB\");\n        _counter_total_available_mb.init_app_counter(\"eon.replica_stub\",\n                                                     \"disk.available.total(MB)\",\n                                                     COUNTER_TYPE_NUMBER,\n                                                     \"total disk available in MB\");\n        _counter_total_available_ratio.init_app_counter(\"eon.replica_stub\",\n                                                        \"disk.available.total.ratio\",\n                                                        COUNTER_TYPE_NUMBER,\n                                                        \"total disk available ratio\");\n        _counter_min_available_ratio.init_app_counter(\"eon.replica_stub\",\n                                                      \"disk.available.min.ratio\",\n                                                      COUNTER_TYPE_NUMBER,\n                                                      \"minimal disk available ratio in all disks\");\n        _counter_max_available_ratio.init_app_counter(\"eon.replica_stub\",\n                                                      \"disk.available.max.ratio\",\n                                                      COUNTER_TYPE_NUMBER,\n                                                      \"maximal disk available ratio in all disks\");\n    }\n}\n\ndir_node *fs_manager::get_dir_node(const std::string &subdir)\n{\n    std::string norm_subdir;\n    utils::filesystem::get_normalized_path(subdir, norm_subdir);\n    for (auto &n : _dir_nodes) {\n        // if input is a subdir of some dir_nodes\n        const std::string &d = n->full_dir;\n        if (norm_subdir.compare(0, d.size(), d) == 0 &&\n            (norm_subdir.size() == d.size() || norm_subdir[d.size()] == '/')) {\n            return n.get();\n        }\n    }\n    return nullptr;\n}\n\n// size of the two vectors should be equal\ndsn::error_code fs_manager::initialize(const std::vector<std::string> &data_dirs,\n                                       const std::vector<std::string> &tags,\n                                       bool for_test)\n{\n    // create all dir_nodes\n    dassert(data_dirs.size() == tags.size(),\n            \"data_dir size(%u) != tags size(%u)\",\n            data_dirs.size(),\n            tags.size());\n    for (unsigned i = 0; i < data_dirs.size(); ++i) {\n        std::string norm_path;\n        utils::filesystem::get_normalized_path(data_dirs[i], norm_path);\n        dir_node *n = new dir_node(tags[i], norm_path);\n        _dir_nodes.emplace_back(n);\n        ddebug(\"%s: mark data dir(%s) as tag(%s)\",\n               dsn_primary_address().to_string(),\n               norm_path.c_str(),\n               tags[i].c_str());\n    }\n    _available_data_dirs = data_dirs;\n\n    if (!for_test) {\n        update_disk_stat(false);\n    }\n    return dsn::ERR_OK;\n}\n\ndsn::error_code fs_manager::get_disk_tag(const std::string &dir, std::string &tag)\n{\n    dir_node *n = get_dir_node(dir);\n    if (nullptr == n) {\n        return dsn::ERR_OBJECT_NOT_FOUND;\n    } else {\n        tag = n->tag;\n        return dsn::ERR_OK;\n    }\n}\n\nvoid fs_manager::add_replica(const gpid &pid, const std::string &pid_dir)\n{\n    dir_node *n = get_dir_node(pid_dir);\n    if (nullptr == n) {\n        derror(\"%s: dir(%s) of gpid(%d.%d) haven't registered\",\n               dsn_primary_address().to_string(),\n               pid_dir.c_str(),\n               pid.get_app_id(),\n               pid.get_partition_index());\n    } else {\n        zauto_write_lock l(_lock);\n        std::set<dsn::gpid> &replicas_for_app = n->holding_replicas[pid.get_app_id()];\n        auto result = replicas_for_app.emplace(pid);\n        if (!result.second) {\n            dwarn(\"%s: gpid(%d.%d) already in the dir_node(%s)\",\n                  dsn_primary_address().to_string(),\n                  pid.get_app_id(),\n                  pid.get_partition_index(),\n                  n->tag.c_str());\n        } else {\n            ddebug(\"%s: add gpid(%d.%d) to dir_node(%s)\",\n                   dsn_primary_address().to_string(),\n                   pid.get_app_id(),\n                   pid.get_partition_index(),\n                   n->tag.c_str());\n        }\n    }\n}\n\nvoid fs_manager::allocate_dir(const gpid &pid, const std::string &type, /*out*/ std::string &dir)\n{\n    char buffer[256];\n    sprintf(buffer, \"%d.%d.%s\", pid.get_app_id(), pid.get_partition_index(), type.c_str());\n\n    zauto_write_lock l(_lock);\n\n    dir_node *selected = nullptr;\n\n    unsigned least_app_replicas_count = 0;\n    unsigned least_total_replicas_count = 0;\n\n    for (auto &n : _dir_nodes) {\n        dassert(!n->has(pid),\n                \"gpid(%d.%d) already in dir_node(%s)\",\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                n->tag.c_str());\n        unsigned app_replicas = n->replicas_count(pid.get_app_id());\n        unsigned total_replicas = n->replicas_count();\n\n        if (selected == nullptr || least_app_replicas_count > app_replicas) {\n            least_app_replicas_count = app_replicas;\n            least_total_replicas_count = total_replicas;\n            selected = n.get();\n        } else if (least_app_replicas_count == app_replicas &&\n                   least_total_replicas_count > total_replicas) {\n            least_total_replicas_count = total_replicas;\n            selected = n.get();\n        }\n    }\n\n    ddebug(\n        \"%s: put pid(%d.%d) to dir(%s), which has %u replicas of current app, %u replicas totally\",\n        dsn_primary_address().to_string(),\n        pid.get_app_id(),\n        pid.get_partition_index(),\n        selected->tag.c_str(),\n        least_app_replicas_count,\n        least_total_replicas_count);\n\n    selected->holding_replicas[pid.get_app_id()].emplace(pid);\n    dir = utils::filesystem::path_combine(selected->full_dir, buffer);\n}\n\nvoid fs_manager::remove_replica(const gpid &pid)\n{\n    zauto_write_lock l(_lock);\n    unsigned remove_count = 0;\n    for (auto &n : _dir_nodes) {\n        unsigned r = n->remove(pid);\n        dassert(remove_count + r <= 1,\n                \"gpid(%d.%d) found in dir(%s), which was removed before\",\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                n->tag.c_str());\n        if (r != 0) {\n            ddebug(\"%s: remove gpid(%d.%d) from dir(%s)\",\n                   dsn_primary_address().to_string(),\n                   pid.get_app_id(),\n                   pid.get_partition_index(),\n                   n->tag.c_str());\n        }\n        remove_count += r;\n    }\n}\n\nbool fs_manager::for_each_dir_node(const std::function<bool(const dir_node &)> &func) const\n{\n    zauto_read_lock l(_lock);\n    for (auto &n : _dir_nodes) {\n        if (!func(*n))\n            return false;\n    }\n    return true;\n}\n\nvoid fs_manager::update_disk_stat(bool check_status_changed)\n{\n    reset_disk_stat();\n    for (auto &dir_node : _dir_nodes) {\n        if (dir_node->update_disk_stat(check_status_changed)) {\n            _status_updated_dir_nodes.emplace_back(dir_node);\n        }\n        _total_capacity_mb += dir_node->disk_capacity_mb;\n        _total_available_mb += dir_node->disk_available_mb;\n        _min_available_ratio = std::min(dir_node->disk_available_ratio, _min_available_ratio);\n        _max_available_ratio = std::max(dir_node->disk_available_ratio, _max_available_ratio);\n    }\n    _total_available_ratio = static_cast<int>(\n        _total_capacity_mb == 0 ? 0 : std::round(_total_available_mb * 100.0 / _total_capacity_mb));\n\n    ddebug_f(\"update disk space succeed: disk_count = {}, total_capacity_mb = {}, \"\n             \"total_available_mb = {}, total_available_ratio = {}%, min_available_ratio = {}%, \"\n             \"max_available_ratio = {}%\",\n             _dir_nodes.size(),\n             _total_capacity_mb,\n             _total_available_mb,\n             _total_available_ratio,\n             _min_available_ratio,\n             _max_available_ratio);\n    _counter_total_capacity_mb->set(_total_capacity_mb);\n    _counter_total_available_mb->set(_total_available_mb);\n    _counter_total_available_ratio->set(_total_available_ratio);\n    _counter_min_available_ratio->set(_min_available_ratio);\n    _counter_max_available_ratio->set(_max_available_ratio);\n}\n\nvoid fs_manager::add_new_dir_node(const std::string &data_dir, const std::string &tag)\n{\n    zauto_write_lock l(_lock);\n    std::string norm_path;\n    utils::filesystem::get_normalized_path(data_dir, norm_path);\n    dir_node *n = new dir_node(tag, norm_path);\n    _dir_nodes.emplace_back(n);\n    _available_data_dirs.emplace_back(data_dir);\n    ddebug_f(\"{}: mark data dir({}) as tag({})\", dsn_primary_address().to_string(), norm_path, tag);\n}\n\nbool fs_manager::is_dir_node_available(const std::string &data_dir, const std::string &tag) const\n{\n    zauto_read_lock l(_lock);\n    for (const auto &dir_node : _dir_nodes) {\n        std::string norm_path;\n        utils::filesystem::get_normalized_path(data_dir, norm_path);\n        if (dir_node->full_dir == norm_path || dir_node->tag == tag) {\n            return true;\n        }\n    }\n    return false;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/fs_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <memory>\n\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/utility/flags.h>\n\n#include \"replication_common.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_int32(disk_min_available_space_ratio);\n\nstruct dir_node\n{\npublic:\n    std::string tag;\n    std::string full_dir;\n    int64_t disk_capacity_mb;\n    int64_t disk_available_mb;\n    int disk_available_ratio;\n    disk_status::type status;\n    std::map<app_id, std::set<gpid>> holding_replicas;\n    std::map<app_id, std::set<gpid>> holding_primary_replicas;\n    std::map<app_id, std::set<gpid>> holding_secondary_replicas;\n\npublic:\n    dir_node(const std::string &tag_,\n             const std::string &dir_,\n             int64_t disk_capacity_mb_ = 0,\n             int64_t disk_available_mb_ = 0,\n             int disk_available_ratio_ = 0,\n             disk_status::type status_ = disk_status::NORMAL)\n        : tag(tag_),\n          full_dir(dir_),\n          disk_capacity_mb(disk_capacity_mb_),\n          disk_available_mb(disk_available_mb_),\n          disk_available_ratio(disk_available_ratio_),\n          status(status_)\n    {\n    }\n    unsigned replicas_count(app_id id) const;\n    unsigned replicas_count() const;\n    bool has(const dsn::gpid &pid) const;\n    unsigned remove(const dsn::gpid &pid);\n    bool update_disk_stat(const bool update_disk_status);\n};\n\nclass fs_manager\n{\npublic:\n    fs_manager(bool for_test);\n    ~fs_manager() {}\n\n    // this should be called before open/load any replicas\n    dsn::error_code initialize(const replication_options &opts);\n    dsn::error_code initialize(const std::vector<std::string> &data_dirs,\n                               const std::vector<std::string> &tags,\n                               bool for_test);\n\n    dsn::error_code get_disk_tag(const std::string &dir, /*out*/ std::string &tag);\n    void allocate_dir(const dsn::gpid &pid,\n                      const std::string &type,\n                      /*out*/ std::string &dir);\n    void add_replica(const dsn::gpid &pid, const std::string &pid_dir);\n    void remove_replica(const dsn::gpid &pid);\n    bool for_each_dir_node(const std::function<bool(const dir_node &)> &func) const;\n    void update_disk_stat(bool check_status_changed = true);\n\n    void add_new_dir_node(const std::string &data_dir, const std::string &tag);\n    bool is_dir_node_available(const std::string &data_dir, const std::string &tag) const;\n    const std::vector<std::string> &get_available_data_dirs() const\n    {\n        zauto_read_lock l(_lock);\n        return _available_data_dirs;\n    }\n\nprivate:\n    void reset_disk_stat()\n    {\n        _total_capacity_mb = 0;\n        _total_available_mb = 0;\n        _total_available_ratio = 0;\n        _min_available_ratio = 100;\n        _max_available_ratio = 0;\n        _status_updated_dir_nodes.clear();\n    }\n\n    dir_node *get_dir_node(const std::string &subdir);\n\n    // when visit the tag/storage of the _dir_nodes map, there's no need to protect by the lock.\n    // but when visit the holding_replicas, you must take care.\n    mutable zrwlock_nr _lock;\n\n    int64_t _total_capacity_mb = 0;\n    int64_t _total_available_mb = 0;\n    int _total_available_ratio = 0;\n    int _min_available_ratio = 100;\n    int _max_available_ratio = 0;\n\n    std::vector<std::shared_ptr<dir_node>> _dir_nodes;\n    std::vector<std::string> _available_data_dirs;\n\n    // Used for disk available space check\n    // disk status will be updated periodically, this vector record nodes whose disk_status changed\n    // in this round\n    std::vector<std::shared_ptr<dir_node>> _status_updated_dir_nodes;\n\n    perf_counter_wrapper _counter_total_capacity_mb;\n    perf_counter_wrapper _counter_total_available_mb;\n    perf_counter_wrapper _counter_total_available_ratio;\n    perf_counter_wrapper _counter_min_available_ratio;\n    perf_counter_wrapper _counter_max_available_ratio;\n\n    friend class replica_test;\n    friend class replica_stub;\n    friend class mock_replica_stub;\n    friend class replica_disk_migrator;\n    friend class replica_disk_test_base;\n    friend class open_replica_test;\n};\n} // replication\n} // dsn\n"
  },
  {
    "path": "src/common/manual_compact.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta_admin_types.h\"\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nnamespace replication {\ntypedef rpc_holder<start_app_manual_compact_request, start_app_manual_compact_response>\n    start_manual_compact_rpc;\ntypedef rpc_holder<query_app_manual_compact_request, query_app_manual_compact_response>\n    query_manual_compact_rpc;\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/meta_admin.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\ninclude \"metadata.thrift\"\n\nnamespace cpp dsn.replication\n\n// This file contains the administration RPCs from client to MetaServer.\n\nenum config_type\n{\n    CT_INVALID,\n    CT_ASSIGN_PRIMARY,\n    CT_UPGRADE_TO_PRIMARY,\n    CT_ADD_SECONDARY,\n    CT_UPGRADE_TO_SECONDARY, // not used by meta server\n    CT_DOWNGRADE_TO_SECONDARY,\n    CT_DOWNGRADE_TO_INACTIVE,\n    CT_REMOVE,\n    CT_ADD_SECONDARY_FOR_LB,\n    CT_PRIMARY_FORCE_UPDATE_BALLOT,\n    CT_DROP_PARTITION,\n    CT_REGISTER_CHILD\n}\n\nenum node_status\n{\n    NS_INVALID,\n    NS_ALIVE,\n    NS_UNALIVE,\n}\n\n// primary | secondary(upgrading) (w/ new config) => meta server\n// also served as proposals from meta server to replica servers\nstruct configuration_update_request\n{\n    1:dsn.layer2.app_info                 info;\n    2:dsn.layer2.partition_configuration  config;\n    3:config_type              type = config_type.CT_INVALID;\n    4:dsn.rpc_address          node;\n    5:dsn.rpc_address          host_node; // deprecated, only used by stateless apps\n\n    // Used for partition split\n    // if replica is splitting (whose split_status is not NOT_SPLIT)\n    // the `meta_split_status` will be set\n    // only used when on_config_sync\n    6:optional metadata.split_status    meta_split_status;\n}\n\n// meta server (config mgr) => primary | secondary (downgrade) (w/ new config)\nstruct configuration_update_response\n{\n    1:dsn.error_code           err;\n    2:dsn.layer2.partition_configuration  config;\n}\n\n// client => meta server\nstruct replica_server_info\n{\n    // replica server can report its geo position\n    // possible tags may be:\n    // geo_tags[\"host\"] = hostid;\n    // geo_tags[\"rack\"] = rackid\n    // geo_tags[\"datacenter\"] = datacenterid\n    // geo_tags[\"city\"] = cityid\n    1:map<string, string> geo_tags;\n    2:i64 total_capacity_mb;\n}\n\nstruct configuration_query_by_node_request\n{\n    1:dsn.rpc_address  node;\n    2:optional list<metadata.replica_info> stored_replicas;\n    3:optional replica_server_info info;\n}\n\nstruct configuration_query_by_node_response\n{\n    1:dsn.error_code err;\n    2:list<configuration_update_request> partitions;\n    3:optional list<metadata.replica_info> gc_replicas;\n}\n\nstruct configuration_recovery_request\n{\n    1:list<dsn.rpc_address> recovery_set;\n    2:bool skip_bad_nodes;\n    3:bool skip_lost_partitions;\n}\n\nstruct configuration_recovery_response\n{\n    1:dsn.error_code err;\n    2:string hint_message;\n}\n\n/////////////////// Tables Management ////////////////////\n\nstruct create_app_options\n{\n    1:i32              partition_count;\n    2:i32              replica_count;\n    3:bool             success_if_exist;\n    4:string           app_type;\n    5:bool             is_stateful;\n    6:map<string, string>  envs;\n}\n\nstruct configuration_create_app_request\n{\n    1:string                   app_name;\n    2:create_app_options       options;\n}\n\n// meta server => client\nstruct configuration_create_app_response\n{\n    1:dsn.error_code   err;\n    2:i32              appid;\n}\n\nstruct drop_app_options\n{\n    1:bool             success_if_not_exist;\n    2:optional i64     reserve_seconds;\n}\n\nstruct configuration_drop_app_request\n{\n    1:string                   app_name;\n    2:drop_app_options         options;\n}\n\nstruct configuration_drop_app_response\n{\n    1:dsn.error_code   err;\n}\n\nstruct configuration_recall_app_request\n{\n    1:i32 app_id;\n    2:string new_app_name;\n}\n\nstruct configuration_recall_app_response\n{\n    1:dsn.error_code err;\n    2:dsn.layer2.app_info info;\n}\n\nstruct configuration_list_apps_request\n{\n    1:dsn.layer2.app_status    status = app_status.AS_INVALID;\n}\n\nstruct configuration_list_apps_response\n{\n    1:dsn.error_code              err;\n    2:list<dsn.layer2.app_info>   infos;\n}\n\nstruct query_app_info_request\n{\n    1:dsn.rpc_address meta_server;\n}\n\nstruct query_app_info_response\n{\n    1:dsn.error_code err;\n    2:list<dsn.layer2.app_info> apps;\n}\n\nenum app_env_operation\n{\n    APP_ENV_OP_INVALID,\n    APP_ENV_OP_SET,\n    APP_ENV_OP_DEL,\n    APP_ENV_OP_CLEAR\n}\n\nstruct configuration_update_app_env_request\n{\n    1:string app_name;\n    2:app_env_operation op = app_env_operation.APP_ENV_OP_INVALID;\n    3:optional list<string> keys;           // used for set and del\n    4:optional list<string> values;         // only used for set\n    5:optional string clear_prefix;         // only used for clear\n                                            // if clear_prefix is empty then we clear all envs\n                                            // else clear the env that key = \"clear_prefix.xxx\"\n}\n\nstruct configuration_update_app_env_response\n{\n    1:dsn.error_code err;\n    2:string hint_message;\n}\n\nstruct start_app_manual_compact_request\n{\n    1:string        app_name;\n    2:optional i64  trigger_time;\n    3:optional i32  target_level;\n    4:optional bool bottommost;\n    5:optional i32  max_running_count;\n}\n\nstruct start_app_manual_compact_response\n{\n    // Possible error:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_OPERATION_DISABLED: app disable manual compaction\n    // - ERR_INVALID_PARAMETERS: invalid manual compaction parameters\n    1:dsn.error_code    err;\n    2:string            hint_msg;\n}\n\nstruct query_app_manual_compact_request\n{\n    1:string app_name;\n}\n\nstruct query_app_manual_compact_response\n{\n    // Possible error:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INVALID_STATE: app is not executing manual compaction\n    1:dsn.error_code    err;\n    2:string            hint_msg;\n    3:optional i32      progress;\n}\n\n/////////////////// Nodes Management ////////////////////\n\nstruct node_info\n{\n    1:node_status      status = node_status.NS_INVALID;\n    2:dsn.rpc_address  address;\n}\n\nstruct configuration_list_nodes_request\n{\n    1:node_status              status = node_status.NS_INVALID;\n}\n\nstruct configuration_list_nodes_response\n{\n    1:dsn.error_code   err;\n    2:list<node_info>  infos;\n}\n\nstruct configuration_cluster_info_request\n{\n}\n\nstruct configuration_cluster_info_response\n{\n    1:dsn.error_code   err;\n    2:list<string>     keys;\n    3:list<string>     values;\n}\n\nenum meta_function_level\n{\n    // there are 4 ways to modify the meta-server's status:\n    // 0. DDL operation: create/drop/recall table\n    // 1. downgrade primary when dectect it is not alive\n    // 2. accept primary's update-request to kickoff some secondaries\n    // 3. make balancer proposal, which further trigger 2\n    // according to these ways, we give meta several active level.\n\n    fl_stopped = 100, //we don't take any action to modify the meta's status, even the DDL operations are not responsed\n    fl_blind = 200, //only DDL operations are responsed, 1 2 3 are just ignored\n    fl_freezed = 300, //0 1 are responsed, 2 3 ignored\n    fl_steady = 400, //0 1 2 are responsed, don't do any balancer\n    fl_lively = 500, //full functional\n    fl_invalid = 10000\n}\n\n// if the level is invalid, we just response the old level of meta without updating it\nstruct configuration_meta_control_request\n{\n    1:meta_function_level level;\n}\n\nstruct configuration_meta_control_response\n{\n    1:dsn.error_code err;\n    2:meta_function_level old_level;\n}\n\nenum balancer_request_type\n{\n    move_primary,\n    copy_primary,\n    copy_secondary,\n}\n\nstruct configuration_proposal_action\n{\n    1:dsn.rpc_address target;\n    2:dsn.rpc_address node;\n    3:config_type type;\n\n    // depricated now\n    // new fields of this struct should start with 5\n    // 4:i64 period_ts;\n}\n\nstruct configuration_balancer_request\n{\n    1:dsn.gpid gpid;\n    2:list<configuration_proposal_action> action_list;\n    3:optional bool force = false;\n    4:optional balancer_request_type balance_type;\n}\n\nstruct configuration_balancer_response\n{\n    1:dsn.error_code err;\n}\n\nstruct ddd_diagnose_request\n{\n    // app_id == -1 means return all partitions of all apps\n    // app_id != -1 && partition_id == -1 means return all partitions of specified app\n    // app_id != -1 && partition_id != -1 means return specified partition\n    1:dsn.gpid pid;\n}\n\nstruct ddd_node_info\n{\n    1:dsn.rpc_address node;\n    2:i64             drop_time_ms;\n    3:bool            is_alive; // if the node is alive now\n    4:bool            is_collected; // if replicas has been collected from this node\n    5:i64             ballot; // collected && ballot == -1 means replica not exist on this node\n    6:i64             last_committed_decree;\n    7:i64             last_prepared_decree;\n}\n\nstruct ddd_partition_info\n{\n    1:dsn.layer2.partition_configuration config;\n    2:list<ddd_node_info>                dropped;\n    3:string                             reason;\n}\n\nstruct ddd_diagnose_response\n{\n    1:dsn.error_code           err;\n    2:list<ddd_partition_info> partitions;\n}\n\nstruct configuration_get_max_replica_count_request\n{\n    1:string                    app_name;\n}\n\nstruct configuration_get_max_replica_count_response\n{\n    1:dsn.error_code            err;\n    2:i32                       max_replica_count;\n    3:string                    hint_message;\n}\n\nstruct configuration_set_max_replica_count_request\n{\n    1:string                    app_name;\n    2:i32                       max_replica_count;\n}\n\nstruct configuration_set_max_replica_count_response\n{\n    1:dsn.error_code            err;\n    2:i32                       old_max_replica_count;\n    3:string                    hint_message;\n}\n"
  },
  {
    "path": "src/common/metadata.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\n\nnamespace cpp dsn.replication\n\nenum partition_status\n{\n    PS_INVALID,\n    PS_INACTIVE,\n    PS_ERROR,\n    PS_PRIMARY,\n    PS_SECONDARY,\n    PS_POTENTIAL_SECONDARY,\n    PS_PARTITION_SPLIT\n}\n\n// partition split status\nenum split_status\n{\n    // idle state\n    NOT_SPLIT,\n    // A replica is splitting into two replicas, original one called parent, new one called child\n    SPLITTING,\n    PAUSING,\n    PAUSED,\n    // After split is successfully cancelled, the state turns into NOT_SPLIT\n    CANCELING\n}\n\nenum disk_status\n{\n    NORMAL = 0,\n    SPACE_INSUFFICIENT\n}\n\nenum manual_compaction_status\n{\n    IDLE = 0,\n    QUEUING,\n    RUNNING,\n    FINISHED\n}\n\n// Used for cold backup and bulk load\nstruct file_meta\n{\n    1:string    name;\n    2:i64       size;\n    3:string    md5;\n}\n\nstruct replica_configuration\n{\n    1:dsn.gpid            pid;\n    2:i64                 ballot;\n    3:dsn.rpc_address     primary;\n    4:partition_status    status = partition_status.PS_INVALID;\n    5:i64                 learner_signature;\n    // Used for bulk load\n    // secondary will pop all committed mutations even if buffer is not full\n    6:optional bool       pop_all = false;\n    // Used for partition split when primary send prepare message to secondary\n    // 1. true - secondary should copy mutation in this prepare message synchronously,\n    //           and _is_sync_to_child in mutation structure should set true\n    // 2. false - secondary copy mutation in this prepare message asynchronously\n    // NOTICE: it should always be false when update_local_configuration\n    7:optional bool       split_sync_to_child = false;\n}\n\nstruct replica_info\n{\n    1:dsn.gpid                          pid;\n    2:i64                               ballot;\n    3:partition_status                  status;\n    4:i64                               last_committed_decree;\n    5:i64                               last_prepared_decree;\n    6:i64                               last_durable_decree;\n    7:string                            app_type;\n    8:string                            disk_tag;\n    9:optional manual_compaction_status manual_compact_status;\n}\n"
  },
  {
    "path": "src/common/partition_split.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\ninclude \"metadata.thrift\"\n\nnamespace cpp dsn.replication\n\n// client to meta server to start partition split\nstruct start_partition_split_request\n{\n    1:string    app_name;\n    2:i32       new_partition_count;\n}\n\nstruct start_partition_split_response\n{\n    // Possible errors:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INVALID_PARAMETERS: if the given new_partition_count != old_partition_count * 2\n    // - ERR_BUSY - if app is already executing partition split\n    1:dsn.error_code    err;\n    2:string            hint_msg;\n}\n\nenum split_control_type\n{\n    PAUSE,\n    RESTART,\n    CANCEL\n}\n\n// client to meta server to control partition split\n// support three control type: pause, restart, cancel\nstruct control_split_request\n{\n    1:string                app_name;\n    2:split_control_type    control_type\n    // for pause, parent_pidx >= 0, pause specific partition, parent_pidx = -1, pause all splitting partition\n    // for restart, parent_pidx >= 0, restart specific partition, parent_pidx = -1, restart all paused partition\n    // for cancel, parent_pidx will always be -1\n    3:i32                   parent_pidx;\n    // only used for cancel\n    4:optional i32          old_partition_count;\n}\n\nstruct control_split_response\n{\n    // Possible errors:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INVALID_STATE: wrong partition split_status\n    // - ERR_INVALID_PARAMETERS: invalid parent_pidx or old_partition_count\n    // - ERR_CHILD_REGISTERED: child partition has been registered, pause partition split or cancel split failed\n    1:dsn.error_code    err;\n    2:optional string   hint_msg;\n}\n\n// client->meta server to query partition split status\nstruct query_split_request\n{\n    1:string    app_name;\n}\n\nstruct query_split_response\n{\n    // Possible errors:\n    // - ERR_APP_NOT_EXIST: app not exist\n    // - ERR_APP_DROPPED: app has been dropped\n    // - ERR_INVALID_STATE: app is not splitting\n    1:dsn.error_code            err;\n    2:i32                       new_partition_count;\n    3:map<i32,metadata.split_status>     status;\n    4:optional string           hint_msg;\n}\n\n// child to primary parent, notifying that itself has caught up with parent\nstruct notify_catch_up_request\n{\n    1:dsn.gpid          parent_gpid;\n    2:dsn.gpid          child_gpid;\n    3:i64               child_ballot;\n    4:dsn.rpc_address   child_address;\n}\n\nstruct notify_cacth_up_response\n{\n    // Possible errors:\n    // - ERR_OBJECT_NOT_FOUND: replica can not be found\n    // - ERR_INVALID_STATE: replica is not primary or ballot not match or child_gpid not match\n    1:dsn.error_code    err;\n}\n\n// primary parent -> child replicas to update partition count\nstruct update_child_group_partition_count_request\n{\n    1:dsn.rpc_address   target_address;\n    2:i32               new_partition_count;\n    3:dsn.gpid          child_pid;\n    4:i64               ballot;\n}\n\nstruct update_child_group_partition_count_response\n{\n    // Possible errors:\n    // - ERR_OBJECT_NOT_FOUND: replica can not be found\n    // - ERR_VERSION_OUTDATED: request is outdated\n    1:dsn.error_code    err;\n}\n\n// primary parent -> meta server, register child on meta_server\nstruct register_child_request\n{\n    1:dsn.layer2.app_info                   app;\n    2:dsn.layer2.partition_configuration    parent_config;\n    3:dsn.layer2.partition_configuration    child_config;\n    4:dsn.rpc_address                       primary_address;\n}\n\nstruct register_child_response\n{\n    // Possible errors:\n    // - ERR_INVALID_VERSION: request is out-dated\n    // - ERR_CHILD_REGISTERED: child has been registered\n    // - ERR_IO_PENDING: meta is executing another remote sync task\n    // - ERR_INVALID_STATE: parent partition is not splitting\n    1:dsn.error_code                        err;\n    2:dsn.layer2.app_info                   app;\n    3:dsn.layer2.partition_configuration    parent_config;\n    4:dsn.layer2.partition_configuration    child_config;\n}\n\n// primary -> meta to report pause or cancel split succeed\nstruct notify_stop_split_request\n{\n    1:string        app_name;\n    2:dsn.gpid      parent_gpid;\n    3:metadata.split_status  meta_split_status;\n    4:i32           partition_count;\n}\n\nstruct notify_stop_split_response\n{\n    // Possible errors:\n    // - ERR_INVALID_VERSION: request is out-dated\n    1:dsn.error_code    err;\n}\n\n// primary parent -> meta server, query child state on meta server\nstruct query_child_state_request\n{\n    1:string    app_name\n    2:dsn.gpid  pid;\n    3:i32       partition_count;\n}\n\nstruct query_child_state_response\n{\n    // Possible errors:\n    // - ERR_INVALID_STATE: app is not splitting or partition split has been canceled\n    1:dsn.error_code                                err;\n    2:optional i32                                  partition_count;\n    3:optional dsn.layer2.partition_configuration   child_config;\n}\n"
  },
  {
    "path": "src/common/partition_split_common.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"partition_split_types.h\"\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nnamespace replication {\ntypedef rpc_holder<start_partition_split_request, start_partition_split_response> start_split_rpc;\ntypedef rpc_holder<control_split_request, control_split_response> control_split_rpc;\ntypedef rpc_holder<query_split_request, query_split_response> query_split_rpc;\ntypedef rpc_holder<register_child_request, register_child_response> register_child_rpc;\ntypedef rpc_holder<notify_stop_split_request, notify_stop_split_response> notify_stop_split_rpc;\ntypedef rpc_holder<query_child_state_request, query_child_state_response> query_child_state_rpc;\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/replica_admin.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\ninclude \"../dsn.layer2.thrift\"\ninclude \"metadata.thrift\"\n\nnamespace cpp dsn.replication\n\nstruct query_replica_decree_request\n{\n    1:dsn.gpid pid;\n    2:dsn.rpc_address     node;\n}\n\nstruct query_replica_decree_response\n{\n    1:dsn.error_code      err;\n    2:i64                 last_decree;\n}\n\nstruct query_replica_info_request\n{\n    1:dsn.rpc_address     node;\n}\n\nstruct query_replica_info_response\n{\n    1:dsn.error_code      err;\n    2:list<metadata.replica_info>  replicas;\n}\n\nstruct disk_info\n{\n    1:string tag;\n    2:string full_dir;\n    3:i64 disk_capacity_mb;\n    4:i64 disk_available_mb;\n    // app_id=>set<gpid>\n    5:map<i32,set<dsn.gpid>> holding_primary_replicas;\n    6:map<i32,set<dsn.gpid>> holding_secondary_replicas;\n}\n\n// This request is sent from client to replica_server.\nstruct query_disk_info_request\n{\n    1:dsn.rpc_address node;\n    2:string          app_name;\n}\n\n// This response is from replica_server to client.\nstruct query_disk_info_response\n{\n    // app not existed will return \"ERR_OBJECT_NOT_FOUND\", otherwise \"ERR_OK\"\n    1:dsn.error_code err;\n    2:i64 total_capacity_mb;\n    3:i64 total_available_mb;\n    4:list<disk_info> disk_infos;\n}\n\n// This request is sent from client to replica_server.\nstruct replica_disk_migrate_request\n{\n    1:dsn.gpid pid\n    // disk tag, for example `ssd1`. `origin_disk` and `target_disk` must be specified in the config of [replication] data_dirs.\n    2:string origin_disk;\n    3:string target_disk;\n}\n\n// This response is from replica_server to client.\nstruct replica_disk_migrate_response\n{\n   // Possible error:\n   // -ERR_OK: start do replica disk migrate\n   // -ERR_BUSY: current replica migration is running\n   // -ERR_INVALID_STATE: current replica partition status isn't secondary\n   // -ERR_INVALID_PARAMETERS: origin disk is equal with target disk\n   // -ERR_OBJECT_NOT_FOUND: replica not found, origin or target disk isn't existed, origin disk doesn't exist current replica\n   // -ERR_PATH_ALREADY_EXIST: target disk has existed current replica\n   1:dsn.error_code err;\n   2:optional string hint;\n}\n\nenum disk_migration_status\n{\n    IDLE,\n    MOVING,\n    MOVED,\n    CLOSED\n}\n\nenum hotkey_type\n{\n    READ,\n    WRITE\n}\n\nenum detect_action\n{\n    START,\n    STOP,\n    QUERY\n}\n\nstruct detect_hotkey_request {\n    1: hotkey_type type\n    2: detect_action action\n    3: dsn.gpid pid;\n}\n\nstruct detect_hotkey_response {\n    // Possible error:\n    // - ERR_OK: start/stop hotkey detect succeed\n    // - ERR_OBJECT_NOT_FOUND: replica not found\n    // - ERR_SERVICE_ALREADY_EXIST: hotkey detection is running now\n    1: dsn.error_code err;\n    2: optional string err_hint;\n    3: optional string hotkey_result;\n}\n\nstruct add_new_disk_request {\n    // format is \"disk_tag:disk_dir,tag2:dir2\"\n    // for example: \"ssd1:/home/work/ssd1\"\n    1: string disk_str;\n}\n\nstruct add_new_disk_response {\n    // Possible error:\n    // - ERR_INVALID_PARAMETERS: invalid disk_str in request\n    // - ERR_NODE_ALREADY_EXIST: data_dir is already available\n    // - ERR_DIR_NOT_EMPTY: data_dir is not empty\n    // - ERR_FILE_OPERATION_FAILED: can't create data_dir or directory can't read/write\n    1: dsn.error_code err;\n    2: optional string err_hint;\n}\n"
  },
  {
    "path": "src/common/replication_common.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <fstream>\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/filesystem.h>\n\n#include \"replication_common.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_int32(\"replication\",\n                 max_concurrent_bulk_load_downloading_count,\n                 5,\n                 \"concurrent bulk load downloading replica count\");\n\n/**\n * Empty write is used for flushing WAL log entry which is submit asynchronously.\n * Make sure it can work well if you diable it.\n */\nDSN_DEFINE_bool(\"replication\",\n                empty_write_disabled,\n                false,\n                \"whether to disable empty write, default is false\");\nDSN_TAG_VARIABLE(empty_write_disabled, FT_MUTABLE);\n\nreplication_options::replication_options()\n{\n    deny_client_on_start = false;\n    verbose_client_log_on_start = false;\n    verbose_commit_log_on_start = false;\n    delay_for_fd_timeout_on_start = false;\n    duplication_enabled = true;\n\n    prepare_timeout_ms_for_secondaries = 1000;\n    prepare_timeout_ms_for_potential_secondaries = 3000;\n    prepare_decree_gap_for_debug_logging = 10000;\n\n    batch_write_disabled = false;\n    staleness_for_commit = 10;\n    max_mutation_count_in_prepare_list = 110;\n    mutation_2pc_min_replica_count = 2;\n\n    group_check_disabled = false;\n    group_check_interval_ms = 10000;\n\n    checkpoint_disabled = false;\n    checkpoint_interval_seconds = 100;\n    checkpoint_min_decree_gap = 10000;\n    checkpoint_max_interval_hours = 2;\n\n    gc_disabled = false;\n    gc_interval_ms = 30 * 1000;                     // 30 seconds\n    gc_memory_replica_interval_ms = 10 * 60 * 1000; // 10 minutes\n\n    disk_stat_disabled = false;\n    disk_stat_interval_seconds = 600;\n\n    fd_disabled = false;\n    fd_check_interval_seconds = 2;\n    fd_beacon_interval_seconds = 3;\n    fd_lease_seconds = 9;\n    fd_grace_seconds = 10;\n\n    log_private_file_size_mb = 32;\n    log_private_reserve_max_size_mb = 0;\n    log_private_reserve_max_time_seconds = 0;\n\n    log_shared_file_size_mb = 32;\n    log_shared_file_count_limit = 100;\n    log_shared_batch_buffer_kb = 0;\n    log_shared_force_flush = false;\n    log_shared_pending_size_throttling_threshold_kb = 0;\n    log_shared_pending_size_throttling_delay_ms = 0;\n\n    config_sync_disabled = false;\n    config_sync_interval_ms = 30000;\n\n    mem_release_enabled = true;\n    mem_release_check_interval_ms = 3600000;\n    mem_release_max_reserved_mem_percentage = 10;\n\n    lb_interval_ms = 10000;\n\n    learn_app_max_concurrent_count = 5;\n\n    cold_backup_checkpoint_reserve_minutes = 10;\n}\n\nreplication_options::~replication_options() {}\n\nvoid replication_options::initialize()\n{\n    const service_app_info &info = service_app::current_service_app_info();\n    app_name = info.full_name;\n    app_dir = info.data_dir;\n\n    // slog_dir:\n    // - if config[slog_dir] is empty: \"app_dir/slog\"\n    // - else: \"config[slog_dir]/app_name/slog\"\n    slog_dir = dsn_config_get_value_string(\"replication\", \"slog_dir\", \"\", \"shared log directory\");\n    if (slog_dir.empty()) {\n        slog_dir = app_dir;\n    } else {\n        slog_dir = utils::filesystem::path_combine(slog_dir, app_name);\n    }\n    slog_dir = utils::filesystem::path_combine(slog_dir, \"slog\");\n\n    // get config_data_dirs and config_data_dir_tags from config\n    const std::string &dirs_str =\n        dsn_config_get_value_string(\"replication\", \"data_dirs\", \"\", \"replica directory list\");\n    std::vector<std::string> config_data_dirs;\n    std::vector<std::string> config_data_dir_tags;\n    std::string error_msg = \"\";\n    bool flag = get_data_dir_and_tag(\n        dirs_str, app_dir, app_name, config_data_dirs, config_data_dir_tags, error_msg);\n    dassert_f(flag, error_msg);\n\n    // check if data_dir in black list, data_dirs doesn't contain dir in black list\n    std::string black_list_file =\n        dsn_config_get_value_string(\"replication\",\n                                    \"data_dirs_black_list_file\",\n                                    \"/home/work/.pegasus_data_dirs_black_list\",\n                                    \"replica directory black list file\");\n    std::vector<std::string> black_list_dirs;\n    get_data_dirs_in_black_list(black_list_file, black_list_dirs);\n    for (auto i = 0; i < config_data_dirs.size(); ++i) {\n        if (check_if_in_black_list(black_list_dirs, config_data_dirs[i])) {\n            continue;\n        }\n        data_dirs.emplace_back(config_data_dirs[i]);\n        data_dir_tags.emplace_back(config_data_dir_tags[i]);\n    }\n\n    if (data_dirs.empty()) {\n        dassert_f(false, \"no replica data dir found, maybe not set or excluded by black list\");\n    }\n\n    deny_client_on_start = dsn_config_get_value_bool(\"replication\",\n                                                     \"deny_client_on_start\",\n                                                     deny_client_on_start,\n                                                     \"whether to deny client read \"\n                                                     \"and write requests when \"\n                                                     \"starting the server, default \"\n                                                     \"is false\");\n    verbose_client_log_on_start = dsn_config_get_value_bool(\"replication\",\n                                                            \"verbose_client_log_on_start\",\n                                                            verbose_client_log_on_start,\n                                                            \"whether to print verbose error \"\n                                                            \"log when reply to client read \"\n                                                            \"and write requests when \"\n                                                            \"starting the server, default \"\n                                                            \"is false\");\n    verbose_commit_log_on_start = dsn_config_get_value_bool(\"replication\",\n                                                            \"verbose_commit_log_on_start\",\n                                                            verbose_commit_log_on_start,\n                                                            \"whether to print verbose log \"\n                                                            \"when commit mutation when \"\n                                                            \"starting the server, default \"\n                                                            \"is false\");\n    delay_for_fd_timeout_on_start =\n        dsn_config_get_value_bool(\"replication\",\n                                  \"delay_for_fd_timeout_on_start\",\n                                  delay_for_fd_timeout_on_start,\n                                  \"whether to delay for beacon grace period to make failure \"\n                                  \"detector timeout when starting the server, default is false\");\n\n    duplication_enabled = dsn_config_get_value_bool(\n        \"replication\", \"duplication_enabled\", duplication_enabled, \"is duplication enabled\");\n\n    prepare_timeout_ms_for_secondaries = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"prepare_timeout_ms_for_secondaries\",\n        prepare_timeout_ms_for_secondaries,\n        \"timeout (ms) for prepare message to secondaries in two phase commit\");\n    prepare_timeout_ms_for_potential_secondaries = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"prepare_timeout_ms_for_potential_secondaries\",\n        prepare_timeout_ms_for_potential_secondaries,\n        \"timeout (ms) for prepare message to potential secondaries in two phase commit\");\n    prepare_decree_gap_for_debug_logging = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"prepare_decree_gap_for_debug_logging\",\n        prepare_decree_gap_for_debug_logging,\n        \"if greater than 0, then print debug log every decree gap of preparing\");\n\n    batch_write_disabled =\n        dsn_config_get_value_bool(\"replication\",\n                                  \"batch_write_disabled\",\n                                  batch_write_disabled,\n                                  \"whether to disable auto-batch of replicated write requests\");\n    staleness_for_commit =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"staleness_for_commit\",\n                                         staleness_for_commit,\n                                         \"how many concurrent two phase commit rounds are allowed\");\n    max_mutation_count_in_prepare_list =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"max_mutation_count_in_prepare_list\",\n                                         max_mutation_count_in_prepare_list,\n                                         \"maximum number of mutations in prepare list\");\n    mutation_2pc_min_replica_count = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"mutation_2pc_min_replica_count\",\n        mutation_2pc_min_replica_count,\n        \"minimum number of alive replicas under which write is allowed\");\n\n    group_check_disabled = dsn_config_get_value_bool(\"replication\",\n                                                     \"group_check_disabled\",\n                                                     group_check_disabled,\n                                                     \"whether group check is disabled\");\n    group_check_interval_ms =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"group_check_interval_ms\",\n                                         group_check_interval_ms,\n                                         \"every what period (ms) we check the replica healthness\");\n\n    checkpoint_disabled = dsn_config_get_value_bool(\"replication\",\n                                                    \"checkpoint_disabled\",\n                                                    checkpoint_disabled,\n                                                    \"whether checkpoint is disabled\");\n    checkpoint_interval_seconds = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"checkpoint_interval_seconds\",\n        checkpoint_interval_seconds,\n        \"every what period (seconds) we do checkpoints for replicated apps\");\n    checkpoint_min_decree_gap =\n        (int64_t)dsn_config_get_value_uint64(\"replication\",\n                                             \"checkpoint_min_decree_gap\",\n                                             checkpoint_min_decree_gap,\n                                             \"minimum decree gap that triggers checkpoint\");\n    checkpoint_max_interval_hours = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"checkpoint_max_interval_hours\",\n        checkpoint_max_interval_hours,\n        \"maximum time interval (hours) where a new checkpoint must be created\");\n\n    gc_disabled = dsn_config_get_value_bool(\n        \"replication\", \"gc_disabled\", gc_disabled, \"whether to disable garbage collection\");\n    gc_interval_ms = (int)dsn_config_get_value_uint64(\"replication\",\n                                                      \"gc_interval_ms\",\n                                                      gc_interval_ms,\n                                                      \"every what period (ms) we do garbage \"\n                                                      \"collection for dead replicas, on-disk \"\n                                                      \"state, log, etc.\");\n    gc_memory_replica_interval_ms = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"gc_memory_replica_interval_ms\",\n        gc_memory_replica_interval_ms,\n        \"after closing a healthy replica (due to LB), the replica will remain in memory for this \"\n        \"long (ms) for quick recover\");\n\n    disk_stat_disabled = dsn_config_get_value_bool(\n        \"replication\", \"disk_stat_disabled\", disk_stat_disabled, \"whether to disable disk stat\");\n    disk_stat_interval_seconds =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"disk_stat_interval_seconds\",\n                                         disk_stat_interval_seconds,\n                                         \"every what period (ms) we do disk stat\");\n\n    fd_disabled = dsn_config_get_value_bool(\n        \"replication\", \"fd_disabled\", fd_disabled, \"whether to disable failure detection\");\n    fd_check_interval_seconds = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"fd_check_interval_seconds\",\n        fd_check_interval_seconds,\n        \"every this period(seconds) the FD will check healthness of remote peers\");\n    fd_beacon_interval_seconds = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"fd_beacon_interval_seconds\",\n        fd_beacon_interval_seconds,\n        \"every this period(seconds) the FD sends beacon message to remote peers\");\n    fd_lease_seconds =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"fd_lease_seconds\",\n                                         fd_lease_seconds,\n                                         \"lease (seconds) get from remote FD master\");\n    fd_grace_seconds = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"fd_grace_seconds\",\n        fd_grace_seconds,\n        \"grace (seconds) assigned to remote FD slaves (grace > lease)\");\n\n    log_private_file_size_mb =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_private_file_size_mb\",\n                                         log_private_file_size_mb,\n                                         \"private log maximum segment file size (MB)\");\n    // ATTENTION: only when log_private_reserve_max_size_mb and log_private_reserve_max_time_seconds\n    // are both satisfied, the useless logs can be reserved.\n    log_private_reserve_max_size_mb =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_private_reserve_max_size_mb\",\n                                         log_private_reserve_max_size_mb,\n                                         \"max size of useless private log to be reserved\");\n    log_private_reserve_max_time_seconds = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"log_private_reserve_max_time_seconds\",\n        log_private_reserve_max_time_seconds,\n        \"max time in seconds of useless private log to be reserved\");\n\n    log_shared_file_size_mb =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_shared_file_size_mb\",\n                                         log_shared_file_size_mb,\n                                         \"shared log maximum segment file size (MB)\");\n    log_shared_file_count_limit = (int)dsn_config_get_value_uint64(\"replication\",\n                                                                   \"log_shared_file_count_limit\",\n                                                                   log_shared_file_count_limit,\n                                                                   \"shared log maximum file count\");\n    log_shared_batch_buffer_kb =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_shared_batch_buffer_kb\",\n                                         log_shared_batch_buffer_kb,\n                                         \"shared log buffer size (KB) for batching incoming logs\");\n    log_shared_force_flush =\n        dsn_config_get_value_bool(\"replication\",\n                                  \"log_shared_force_flush\",\n                                  log_shared_force_flush,\n                                  \"when write shared log, whether to flush file after write done\");\n    log_shared_pending_size_throttling_threshold_kb =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_shared_pending_size_throttling_threshold_kb\",\n                                         log_shared_pending_size_throttling_threshold_kb,\n                                         \"log_shared_pending_size_throttling_threshold_kb\");\n    log_shared_pending_size_throttling_delay_ms =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"log_shared_pending_size_throttling_delay_ms\",\n                                         log_shared_pending_size_throttling_delay_ms,\n                                         \"log_shared_pending_size_throttling_delay_ms\");\n\n    config_sync_disabled = dsn_config_get_value_bool(\n        \"replication\",\n        \"config_sync_disabled\",\n        config_sync_disabled,\n        \"whether to disable replica configuration periodical sync with the meta server\");\n    config_sync_interval_ms = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"config_sync_interval_ms\",\n        config_sync_interval_ms,\n        \"every this period(ms) the replica syncs replica configuration with the meta server\");\n\n    mem_release_enabled = dsn_config_get_value_bool(\"replication\",\n                                                    \"mem_release_enabled\",\n                                                    mem_release_enabled,\n                                                    \"whether to enable periodic memory release\");\n\n    mem_release_check_interval_ms = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"mem_release_check_interval_ms\",\n        mem_release_check_interval_ms,\n        \"the replica check if should release memory to the system every this period of time(ms)\");\n\n    mem_release_max_reserved_mem_percentage = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"mem_release_max_reserved_mem_percentage\",\n        mem_release_max_reserved_mem_percentage,\n        \"if tcmalloc reserved but not-used memory exceed this percentage of application allocated \"\n        \"memory, replica server will release the exceeding memory back to operating system\");\n\n    lb_interval_ms = (int)dsn_config_get_value_uint64(\n        \"replication\",\n        \"lb_interval_ms\",\n        lb_interval_ms,\n        \"every this period(ms) the meta server will do load balance\");\n\n    learn_app_max_concurrent_count =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"learn_app_max_concurrent_count\",\n                                         learn_app_max_concurrent_count,\n                                         \"max count of learning app concurrently\");\n\n    cold_backup_root = dsn_config_get_value_string(\n        \"replication\", \"cold_backup_root\", \"\", \"cold backup remote storage path prefix\");\n\n    cold_backup_checkpoint_reserve_minutes =\n        (int)dsn_config_get_value_uint64(\"replication\",\n                                         \"cold_backup_checkpoint_reserve_minutes\",\n                                         cold_backup_checkpoint_reserve_minutes,\n                                         \"reserve minutes of cold backup checkpoint\");\n\n    max_concurrent_bulk_load_downloading_count = FLAGS_max_concurrent_bulk_load_downloading_count;\n\n    dassert_f(replica_helper::load_meta_servers(meta_servers), \"invalid meta server config\");\n\n    sanity_check();\n}\n\nvoid replication_options::sanity_check()\n{\n    dassert(max_mutation_count_in_prepare_list >= staleness_for_commit,\n            \"%d VS %d\",\n            max_mutation_count_in_prepare_list,\n            staleness_for_commit);\n}\n\n/*static*/ bool replica_helper::remove_node(::dsn::rpc_address node,\n                                            /*inout*/ std::vector<::dsn::rpc_address> &nodeList)\n{\n    auto it = std::find(nodeList.begin(), nodeList.end(), node);\n    if (it != nodeList.end()) {\n        nodeList.erase(it);\n        return true;\n    } else {\n        return false;\n    }\n}\n\n/*static*/ bool replica_helper::get_replica_config(const partition_configuration &partition_config,\n                                                   ::dsn::rpc_address node,\n                                                   /*out*/ replica_configuration &replica_config)\n{\n    replica_config.pid = partition_config.pid;\n    replica_config.primary = partition_config.primary;\n    replica_config.ballot = partition_config.ballot;\n    replica_config.learner_signature = invalid_signature;\n\n    if (node == partition_config.primary) {\n        replica_config.status = partition_status::PS_PRIMARY;\n        return true;\n    } else if (std::find(partition_config.secondaries.begin(),\n                         partition_config.secondaries.end(),\n                         node) != partition_config.secondaries.end()) {\n        replica_config.status = partition_status::PS_SECONDARY;\n        return true;\n    } else {\n        replica_config.status = partition_status::PS_INACTIVE;\n        return false;\n    }\n}\n\nbool replica_helper::load_meta_servers(/*out*/ std::vector<dsn::rpc_address> &servers,\n                                       const char *section,\n                                       const char *key)\n{\n    servers.clear();\n    std::string server_list = dsn_config_get_value_string(section, key, \"\", \"\");\n    std::vector<std::string> lv;\n    ::dsn::utils::split_args(server_list.c_str(), lv, ',');\n    for (auto &s : lv) {\n        ::dsn::rpc_address addr;\n        if (!addr.from_string_ipv4(s.c_str())) {\n            derror_f(\"invalid address '{}' specified in config [{}].{}\", s, section, key);\n            return false;\n        }\n        servers.push_back(addr);\n    }\n    if (servers.empty()) {\n        derror_f(\"no meta server specified in config [{}].{}\", section, key);\n        return false;\n    }\n    return true;\n}\n\n/*static*/ bool\nreplication_options::get_data_dir_and_tag(const std::string &config_dirs_str,\n                                          const std::string &default_dir,\n                                          const std::string &app_name,\n                                          /*out*/ std::vector<std::string> &data_dirs,\n                                          /*out*/ std::vector<std::string> &data_dir_tags,\n                                          /*out*/ std::string &err_msg)\n{\n    // - if {config_dirs_str} is empty (return true):\n    //   - dir = {default_dir}\n    //   - dir_tag/data_dir_tag = \"default\"\n    //   - data_dir = {default_dir}/\"reps\"\n    // - else if {config_dirs_str} = \"tag1:dir1,tag2:dir2:tag3:dir3\" (return true):\n    //   - dir1 = \"dir1\"/{app_name}\n    //   - dir_tag1/data_dir_tag1 = \"tag1\"\n    //   - data_dir1 = \"dir1\"/{app_name}/\"reps\"\n    // - else (return false):\n    //   - invalid format and set {err_msg}\n    std::vector<std::string> dirs;\n    std::vector<std::string> dir_tags;\n    utils::split_args(config_dirs_str.c_str(), dirs, ',');\n    if (dirs.empty()) {\n        dirs.push_back(default_dir);\n        dir_tags.push_back(\"default\");\n    } else {\n        for (auto &dir : dirs) {\n            std::vector<std::string> tag_and_dir;\n            utils::split_args(dir.c_str(), tag_and_dir, ':');\n            if (tag_and_dir.size() != 2) {\n                err_msg = fmt::format(\"invalid data_dir item({}) in config\", dir);\n                return false;\n            }\n            if (tag_and_dir[0].empty() || tag_and_dir[1].empty()) {\n                err_msg = fmt::format(\"invalid data_dir item({}) in config\", dir);\n                return false;\n            }\n            dir = utils::filesystem::path_combine(tag_and_dir[1], app_name);\n            for (unsigned i = 0; i < dir_tags.size(); ++i) {\n                if (dirs[i] == dir) {\n                    err_msg = fmt::format(\"dir({}) and dir({}) conflict\", dirs[i], dir);\n                    return false;\n                }\n            }\n            for (unsigned i = 0; i < dir_tags.size(); ++i) {\n                if (dir_tags[i] == tag_and_dir[0]) {\n                    err_msg = fmt::format(\n                        \"dir({}) and dir({}) have same tag({})\", dirs[i], dir, tag_and_dir[0]);\n                    return false;\n                }\n            }\n            dir_tags.push_back(tag_and_dir[0]);\n        }\n    }\n\n    for (unsigned i = 0; i < dirs.size(); ++i) {\n        const std::string &dir = dirs[i];\n        ddebug_f(\"data_dirs[{}] = {}, tag = {}\", i + 1, dir, dir_tags[i]);\n        data_dirs.push_back(utils::filesystem::path_combine(dir, \"reps\"));\n        data_dir_tags.push_back(dir_tags[i]);\n    }\n    return true;\n}\n\n/*static*/ void\nreplication_options::get_data_dirs_in_black_list(const std::string &fname,\n                                                 /*out*/ std::vector<std::string> &dirs)\n{\n    if (fname.empty() || !utils::filesystem::file_exists(fname)) {\n        ddebug_f(\"data_dirs_black_list_file[{}] not found, ignore it\", fname);\n        return;\n    }\n\n    ddebug_f(\"data_dirs_black_list_file[{}] found, apply it\", fname);\n    std::ifstream file(fname);\n    if (!file) {\n        dassert_f(false, \"open data_dirs_black_list_file failed: {}\", fname);\n    }\n\n    std::string str;\n    int count = 0;\n    while (std::getline(file, str)) {\n        std::string str2 = utils::trim_string(const_cast<char *>(str.c_str()));\n        if (str2.empty()) {\n            continue;\n        }\n        if (str2.back() != '/') {\n            str2.append(\"/\");\n        }\n        dirs.push_back(str2);\n        count++;\n        ddebug_f(\"black_list[{}] = [{}]\", count, str2);\n    }\n}\n\n/*static*/ bool\nreplication_options::check_if_in_black_list(const std::vector<std::string> &black_list_dir,\n                                            const std::string &dir)\n{\n    std::string dir_str = dir;\n    if (!black_list_dir.empty()) {\n        if (dir_str.back() != '/') {\n            dir_str.append(\"/\");\n        }\n        for (const std::string &black : black_list_dir) {\n            if (dir_str.find(black) == 0) {\n                return true;\n            }\n        }\n    }\n    return false;\n}\n\nconst std::string replica_envs::DENY_CLIENT_REQUEST(\"replica.deny_client_request\");\nconst std::string replica_envs::WRITE_QPS_THROTTLING(\"replica.write_throttling\");\nconst std::string replica_envs::WRITE_SIZE_THROTTLING(\"replica.write_throttling_by_size\");\nconst uint64_t replica_envs::MIN_SLOW_QUERY_THRESHOLD_MS = 20;\nconst std::string replica_envs::SLOW_QUERY_THRESHOLD(\"replica.slow_query_threshold\");\nconst std::string replica_envs::ROCKSDB_USAGE_SCENARIO(\"rocksdb.usage_scenario\");\nconst std::string replica_envs::TABLE_LEVEL_DEFAULT_TTL(\"default_ttl\");\nconst std::string MANUAL_COMPACT_PREFIX(\"manual_compact.\");\nconst std::string replica_envs::MANUAL_COMPACT_DISABLED(MANUAL_COMPACT_PREFIX + \"disabled\");\nconst std::string replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT(\n    MANUAL_COMPACT_PREFIX + \"max_concurrent_running_count\");\nconst std::string MANUAL_COMPACT_ONCE_PREFIX(MANUAL_COMPACT_PREFIX + \"once.\");\nconst std::string replica_envs::MANUAL_COMPACT_ONCE_TRIGGER_TIME(MANUAL_COMPACT_ONCE_PREFIX +\n                                                                 \"trigger_time\");\nconst std::string replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL(MANUAL_COMPACT_ONCE_PREFIX +\n                                                                 \"target_level\");\nconst std::string replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION(\n    MANUAL_COMPACT_ONCE_PREFIX + \"bottommost_level_compaction\");\nconst std::string MANUAL_COMPACT_PERIODIC_PREFIX(MANUAL_COMPACT_PREFIX + \"periodic.\");\nconst std::string replica_envs::MANUAL_COMPACT_PERIODIC_TRIGGER_TIME(\n    MANUAL_COMPACT_PERIODIC_PREFIX + \"trigger_time\");\nconst std::string replica_envs::MANUAL_COMPACT_PERIODIC_TARGET_LEVEL(\n    MANUAL_COMPACT_PERIODIC_PREFIX + \"target_level\");\nconst std::string replica_envs::MANUAL_COMPACT_PERIODIC_BOTTOMMOST_LEVEL_COMPACTION(\n    MANUAL_COMPACT_PERIODIC_PREFIX + \"bottommost_level_compaction\");\nconst std::string\n    replica_envs::ROCKSDB_CHECKPOINT_RESERVE_MIN_COUNT(\"rocksdb.checkpoint.reserve_min_count\");\nconst std::string replica_envs::ROCKSDB_CHECKPOINT_RESERVE_TIME_SECONDS(\n    \"rocksdb.checkpoint.reserve_time_seconds\");\nconst std::string replica_envs::ROCKSDB_ITERATION_THRESHOLD_TIME_MS(\n    \"replica.rocksdb_iteration_threshold_time_ms\");\nconst std::string replica_envs::ROCKSDB_BLOCK_CACHE_ENABLED(\"replica.rocksdb_block_cache_enabled\");\nconst std::string replica_envs::BUSINESS_INFO(\"business.info\");\nconst std::string replica_envs::REPLICA_ACCESS_CONTROLLER_ALLOWED_USERS(\n    \"replica_access_controller.allowed_users\");\nconst std::string replica_envs::READ_QPS_THROTTLING(\"replica.read_throttling\");\nconst std::string replica_envs::READ_SIZE_THROTTLING(\"replica.read_throttling_by_size\");\nconst std::string\n    replica_envs::SPLIT_VALIDATE_PARTITION_HASH(\"replica.split.validate_partition_hash\");\nconst std::string replica_envs::USER_SPECIFIED_COMPACTION(\"user_specified_compaction\");\nconst std::string replica_envs::BACKUP_REQUEST_QPS_THROTTLING(\"replica.backup_request_throttling\");\nconst std::string replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND(\"rocksdb.allow_ingest_behind\");\nconst std::string replica_envs::UPDATE_MAX_REPLICA_COUNT(\"max_replica_count.update\");\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/replication_common.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/dist/replication.h>\n#include <string>\n\nnamespace dsn {\nnamespace replication {\n\ntypedef std::unordered_map<::dsn::rpc_address, partition_status::type> node_statuses;\ntypedef std::unordered_map<::dsn::rpc_address, dsn::task_ptr> node_tasks;\n\ntypedef rpc_holder<configuration_update_app_env_request, configuration_update_app_env_response>\n    update_app_env_rpc;\n\nclass replication_options\n{\npublic:\n    std::vector<::dsn::rpc_address> meta_servers;\n\n    std::string app_name;\n    std::string app_dir;\n    std::string slog_dir;\n    std::vector<std::string> data_dirs;\n    std::vector<std::string> data_dir_tags;\n\n    bool deny_client_on_start;\n    bool verbose_client_log_on_start;\n    bool verbose_commit_log_on_start;\n    bool delay_for_fd_timeout_on_start;\n    bool duplication_enabled;\n\n    int32_t prepare_timeout_ms_for_secondaries;\n    int32_t prepare_timeout_ms_for_potential_secondaries;\n    int32_t prepare_decree_gap_for_debug_logging;\n\n    bool batch_write_disabled;\n    int32_t staleness_for_commit;\n    int32_t max_mutation_count_in_prepare_list;\n    int32_t mutation_2pc_min_replica_count;\n\n    bool group_check_disabled;\n    int32_t group_check_interval_ms;\n\n    bool checkpoint_disabled;\n    int32_t checkpoint_interval_seconds;\n    int64_t checkpoint_min_decree_gap;\n    int32_t checkpoint_max_interval_hours;\n\n    bool gc_disabled;\n    int32_t gc_interval_ms;\n    int32_t gc_memory_replica_interval_ms;\n\n    bool disk_stat_disabled;\n    int32_t disk_stat_interval_seconds;\n\n    bool fd_disabled;\n    int32_t fd_check_interval_seconds;\n    int32_t fd_beacon_interval_seconds;\n    int32_t fd_lease_seconds;\n    int32_t fd_grace_seconds;\n\n    int32_t log_private_file_size_mb;\n    int32_t log_private_reserve_max_size_mb;\n    int32_t log_private_reserve_max_time_seconds;\n\n    int32_t log_shared_file_size_mb;\n    int32_t log_shared_file_count_limit;\n    int32_t log_shared_batch_buffer_kb;\n    bool log_shared_force_flush;\n    int32_t log_shared_pending_size_throttling_threshold_kb;\n    int32_t log_shared_pending_size_throttling_delay_ms;\n\n    bool config_sync_disabled;\n    int32_t config_sync_interval_ms;\n\n    bool mem_release_enabled;\n    int32_t mem_release_check_interval_ms;\n    int32_t mem_release_max_reserved_mem_percentage;\n\n    int32_t lb_interval_ms;\n\n    int32_t learn_app_max_concurrent_count;\n\n    std::string cold_backup_root;\n    int32_t cold_backup_checkpoint_reserve_minutes;\n\n    int32_t max_concurrent_bulk_load_downloading_count;\n\npublic:\n    replication_options();\n    ~replication_options();\n\n    void initialize();\n    static bool get_data_dir_and_tag(const std::string &config_dirs_str,\n                                     const std::string &default_dir,\n                                     const std::string &app_name,\n                                     /*out*/ std::vector<std::string> &data_dirs,\n                                     /*out*/ std::vector<std::string> &data_dir_tags,\n                                     /*out*/ std::string &err_msg);\n    static void get_data_dirs_in_black_list(const std::string &fname,\n                                            /*out*/ std::vector<std::string> &dirs);\n    static bool check_if_in_black_list(const std::vector<std::string> &black_list_dir,\n                                       const std::string &dir);\n\nprivate:\n    void sanity_check();\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replication_common_test)\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n        dsn_replication_common\n        dsn_runtime\n        gtest\n        )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n        config-test.ini\n        run.sh\n        )\n\ndsn_add_test()\n"
  },
  {
    "path": "src/common/test/common_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <dsn/dist/common.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nTEST(duplication_common, get_current_cluster_name)\n{\n    ASSERT_STREQ(get_current_cluster_name(), \"master-cluster\");\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/common/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.replica]\ntype = replica\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\ncli_local = false\ncli_remote = false\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 1465902258\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 4\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nmax_input_queue_length = 1024\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 2\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\nmax_input_queue_length = 2560\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 3\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[replication]\ncluster_name = master-cluster\n\n[duplication-group]\nmaster-cluster = 1\nslave-cluster  = 2\n"
  },
  {
    "path": "src/common/test/duplication_common_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/duplication_common.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace replication {\n\nTEST(duplication_common, get_duplication_cluster_id)\n{\n    ASSERT_EQ(get_duplication_cluster_id(\"master-cluster\").get_value(), 1);\n    ASSERT_EQ(get_duplication_cluster_id(\"slave-cluster\").get_value(), 2);\n\n    ASSERT_EQ(get_duplication_cluster_id(\"\").get_error().code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(get_duplication_cluster_id(\"unknown\").get_error().code(), ERR_OBJECT_NOT_FOUND);\n}\n\nTEST(duplication_common, get_distinct_cluster_id_set)\n{\n    ASSERT_EQ(get_distinct_cluster_id_set(), std::set<uint8_t>({1, 2}));\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/test/fs_manager_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"common/fs_manager.h\"\n\nnamespace dsn {\nnamespace replication {\n\nTEST(fs_manager, dir_update_disk_status)\n{\n    std::shared_ptr<dir_node> node = std::make_shared<dir_node>(\"tag\", \"path\");\n    struct update_disk_status\n    {\n        bool update_status;\n        bool mock_insufficient;\n        disk_status::type old_disk_status;\n        disk_status::type new_disk_status;\n        bool expected_ret;\n    } tests[] = {\n        {false, false, disk_status::NORMAL, disk_status::NORMAL, false},\n        {false, true, disk_status::NORMAL, disk_status::NORMAL, false},\n        {true, false, disk_status::NORMAL, disk_status::NORMAL, false},\n        {true, false, disk_status::SPACE_INSUFFICIENT, disk_status::NORMAL, true},\n        {true, true, disk_status::NORMAL, disk_status::SPACE_INSUFFICIENT, true},\n        {true, true, disk_status::SPACE_INSUFFICIENT, disk_status::SPACE_INSUFFICIENT, false}};\n    for (const auto &test : tests) {\n        node->status = test.old_disk_status;\n        fail::setup();\n        if (test.mock_insufficient) {\n            fail::cfg(\"filesystem_get_disk_space_info\", \"return(insufficient)\");\n        } else {\n            fail::cfg(\"filesystem_get_disk_space_info\", \"return(normal)\");\n        }\n        ASSERT_EQ(test.expected_ret, node->update_disk_stat(test.update_status));\n        ASSERT_EQ(test.new_disk_status, node->status);\n        fail::teardown();\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/test/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    explicit gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/common/test/replication_common_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <fstream>\n#include <gtest/gtest.h>\n\n#include <dsn/utility/filesystem.h>\n\n#include \"common/replication_common.h\"\n\nnamespace dsn {\nnamespace replication {\n\nTEST(replication_common, get_data_dir_test)\n{\n    std::vector<std::string> data_dirs;\n    std::vector<std::string> data_dir_tags;\n    std::string err_msg = \"\";\n\n    // Test cases:\n    // - default dir: \"\"\n    // - invalid dir:\n    //   - \"wrong_dir\"\n    //   - \"tag:dir:wrong\"\n    //   - \"tag:\"\n    //   - \"tag1:disk,tag2,disk\"\n    //   - \"tag:disk1,tag:disk2\"\n    // - valid: \"tag1:disk1,tag2:disk2\"\n    struct get_data_dir_test\n    {\n        std::string data_dir_str;\n        bool expected_val;\n        int32_t expected_length;\n    } tests[] = {{\"\", true, 1},\n                 {\"wrong_dir\", false, 0},\n                 {\"tag:dir:wrong\", false, 0},\n                 {\"tag:\", false, 0},\n                 {\"tag1:disk,tag2,disk\", false, 0},\n                 {\"tag:disk1,tag:disk2\", false, 0},\n                 {\"tag1:disk1\", true, 1},\n                 {\"tag1:disk1, \", true, 1},\n                 {\"tag1:disk1,tag2:disk2\", true, 2}};\n    for (const auto &test : tests) {\n        data_dirs.clear();\n        data_dir_tags.clear();\n        bool flag = replication_options::get_data_dir_and_tag(\n            test.data_dir_str, \"test_dir\", \"replica\", data_dirs, data_dir_tags, err_msg);\n        ASSERT_EQ(flag, test.expected_val);\n        ASSERT_EQ(data_dirs.size(), data_dir_tags.size());\n        ASSERT_EQ(data_dirs.size(), test.expected_length);\n    }\n}\n\nTEST(replication_common, get_black_list_test)\n{\n    std::string fname = \"black_list_file\";\n    ASSERT_TRUE(utils::filesystem::create_file(fname));\n    std::ofstream test_file;\n    test_file.open(fname);\n    test_file << \"disk1\\ndisk2\\n\";\n    test_file.close();\n\n    std::vector<std::string> black_list;\n    // Test cases:\n    // - file name not set\n    // - file not exist\n    // - file exist\n    struct get_black_list_test\n    {\n        std::string fname;\n        bool has_black_list;\n    } tests[] = {{\"\", false}, {\"file_not_exist\", false}, {\"black_list_file\", true}};\n    for (const auto &test : tests) {\n        black_list.clear();\n        replication_options::get_data_dirs_in_black_list(test.fname, black_list);\n        ASSERT_EQ(!black_list.empty(), test.has_black_list);\n    }\n    utils::filesystem::remove_file_name(fname);\n}\n\nTEST(replication_common, check_in_black_list_test)\n{\n    std::vector<std::string> black_list;\n    black_list.emplace_back(\"dir1/\");\n    black_list.emplace_back(\"dir2/\");\n\n    // Test cases:\n    // - empty black list\n    // - not in list\n    // - in list\n    struct check_in_list_test\n    {\n        bool list_empty;\n        std::string dir_str;\n        bool expected_result;\n    } tests[]{{true, \"dir1\", false}, {false, \"testdir\", false}, {false, \"dir2\", true}};\n    for (const auto &test : tests) {\n        std::vector<std::string> test_list;\n        if (!test.list_empty) {\n            test_list = black_list;\n        }\n        ASSERT_EQ(replication_options::check_if_in_black_list(test_list, test.dir_str),\n                  test.expected_result);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/common/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nexit_if_fail() {\n    if [ $1 != 0 ]; then\n        echo $2\n        exit 1\n    fi\n}\n\n./dsn_replication_common_test\n\nexit_if_fail $? \"run unit test failed\"\n"
  },
  {
    "path": "src/dsn.layer2.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"dsn.thrift\"\n\nnamespace cpp dsn\n\nstruct partition_configuration\n{\n    1:dsn.gpid               pid;\n    2:i64                    ballot;\n    3:i32                    max_replica_count;\n    4:dsn.rpc_address        primary;\n    5:list<dsn.rpc_address>  secondaries;\n    6:list<dsn.rpc_address>  last_drops;\n    7:i64                    last_committed_decree;\n    8:i32                    partition_flags;\n}\n\n\nstruct configuration_query_by_index_request\n{\n    1:string           app_name;\n    2:list<i32>        partition_indices;\n}\n\n// for server version > 1.11.2, if err == ERR_FORWARD_TO_OTHERS,\n// then the forward address will be put in partitions[0].primary if exist.\nstruct configuration_query_by_index_response\n{\n    1:dsn.error_code                err;\n    2:i32                           app_id;\n    3:i32                           partition_count;\n    4:bool                          is_stateful;\n    5:list<partition_configuration> partitions;\n}\n\nenum app_status\n{\n    AS_INVALID,\n    AS_AVAILABLE,\n    AS_CREATING,\n    AS_CREATE_FAILED, // depricated\n    AS_DROPPING,\n    AS_DROP_FAILED, // depricated\n    AS_DROPPED,\n    AS_RECALLING\n}\n\nstruct app_info\n{\n    1:app_status    status = app_status.AS_INVALID;\n    2:string        app_type;\n    3:string        app_name;\n    4:i32           app_id;\n    5:i32           partition_count;\n    6:map<string, string> envs;\n    7:bool          is_stateful;\n    8:i32           max_replica_count;\n    9:i64           expire_second;\n\n    // new fields added from v1.11.0\n    10:i64          create_second;\n    11:i64          drop_second;\n\n    // New fields added from v1.12.0\n    // Whether this app is duplicating.\n    // If true it should prevent its unconfirmed WAL from being compacted.\n    12:optional bool duplicating = false;\n\n    // New fields for partition split\n    // If meta server failed during partition split,\n    // child partition is not existed on remote stroage, but partition count changed.\n    // We use init_partition_count to handle those child partitions while sync_apps_from_remote_stroage\n    13:i32          init_partition_count = -1;\n\n    // New fields for bulk load\n    // Whether this app is executing bulk load\n    14:optional bool    is_bulk_loading = false;\n}\n"
  },
  {
    "path": "src/dsn.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\nnamespace cpp dsn\n\n// place holder\nstruct rpc_address\n{\n}\n\n// place holder\nstruct blob\n{\n}\n\n// place holder\nstruct error_code\n{\n}\n\n// place holder\nstruct task_code\n{\n}\n\n// placeholder\nstruct gpid\n{\n}\n"
  },
  {
    "path": "src/failure_detector/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.failure_detector)\n\nthrift_generate_cpp(\n    FD_THRIFT_SRCS\n    FD_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/fd.thrift\n)\n\nseT(MY_PROJ_SRC ${FD_THRIFT_SRCS})\n\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n        PocoFoundation\n        PocoJSON)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\nadd_subdirectory(test)\n\n"
  },
  {
    "path": "src/failure_detector/failure_detector.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/dist/failure_detector.h>\n#include <dsn/tool-api/command_manager.h>\n#include <chrono>\n#include <ctime>\n\nnamespace dsn {\nnamespace fd {\n\nfailure_detector::failure_detector()\n{\n    dsn::threadpool_code pool = task_spec::get(LPC_BEACON_CHECK.code())->pool_code;\n    task_spec::get(RPC_FD_FAILURE_DETECTOR_PING.code())->pool_code = pool;\n    task_spec::get(RPC_FD_FAILURE_DETECTOR_PING_ACK.code())->pool_code = pool;\n\n    _recent_beacon_fail_count.init_app_counter(\n        \"eon.failure_detector\",\n        \"recent_beacon_fail_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"failure detector beacon fail count in the recent period\");\n\n    _is_started = false;\n}\n\nfailure_detector::~failure_detector()\n{\n    stop();\n    unregister_ctrl_commands();\n}\n\nvoid failure_detector::register_ctrl_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _get_allow_list = dsn::command_manager::instance().register_command(\n            {\"fd.allow_list\"},\n            \"fd.allow_list\",\n            \"show allow list of failure detector\",\n            [this](const std::vector<std::string> &args) { return get_allow_list(args); });\n    });\n}\n\nvoid failure_detector::unregister_ctrl_commands() { UNREGISTER_VALID_HANDLER(_get_allow_list); }\n\nerror_code failure_detector::start(uint32_t check_interval_seconds,\n                                   uint32_t beacon_interval_seconds,\n                                   uint32_t lease_seconds,\n                                   uint32_t grace_seconds,\n                                   bool use_allow_list)\n{\n    _check_interval_milliseconds = check_interval_seconds * 1000;\n    _beacon_interval_milliseconds = beacon_interval_seconds * 1000;\n    // here we set beacon timeout less than beacon interval in order to switch master\n    // immediately once the last beacon fails, to make failure detection more robust.\n    _beacon_timeout_milliseconds = _beacon_interval_milliseconds * 2 / 3;\n    _lease_milliseconds = lease_seconds * 1000;\n    _grace_milliseconds = grace_seconds * 1000;\n\n    _use_allow_list = use_allow_list;\n\n    open_service();\n\n    // start periodically check job\n    _check_task = tasking::enqueue_timer(LPC_BEACON_CHECK,\n                                         &_tracker,\n                                         [this] { check_all_records(); },\n                                         std::chrono::milliseconds(_check_interval_milliseconds),\n                                         -1,\n                                         std::chrono::milliseconds(_check_interval_milliseconds));\n\n    _is_started = true;\n    return ERR_OK;\n}\n\nvoid failure_detector::stop()\n{\n    _tracker.cancel_outstanding_tasks();\n\n    zauto_lock l(_lock);\n    if (!_is_started) {\n        return;\n    }\n    _is_started = false;\n    _masters.clear();\n    _workers.clear();\n}\n\nvoid failure_detector::register_master(::dsn::rpc_address target)\n{\n    bool setup_timer = false;\n\n    zauto_lock l(_lock);\n\n    master_record record(target, dsn_now_ms());\n\n    auto ret = _masters.insert(std::make_pair(target, record));\n    if (ret.second) {\n        dinfo(\"register master[%s] successfully\", target.to_string());\n        setup_timer = true;\n    } else {\n        // active the beacon again in case previously local node is not in target's allow list\n        if (ret.first->second.rejected) {\n            ret.first->second.rejected = false;\n            setup_timer = true;\n        }\n        dinfo(\"master[%s] already registered\", target.to_string());\n    }\n\n    if (setup_timer) {\n        // delay the beacon slightly to make first beacon greater than the\n        // last_beacon_send_time_with_ack\n        ret.first->second.send_beacon_timer =\n            tasking::enqueue_timer(LPC_BEACON_SEND,\n                                   &_tracker,\n                                   [this, target]() { this->send_beacon(target, dsn_now_ms()); },\n                                   std::chrono::milliseconds(_beacon_interval_milliseconds),\n                                   0,\n                                   std::chrono::milliseconds(1));\n    }\n}\n\nbool failure_detector::switch_master(::dsn::rpc_address from,\n                                     ::dsn::rpc_address to,\n                                     uint32_t delay_milliseconds)\n{\n    /* the caller of switch master shoud lock necessarily to protect _masters */\n    auto it = _masters.find(from);\n    auto it2 = _masters.find(to);\n    if (it != _masters.end()) {\n        if (it2 != _masters.end()) {\n            dwarn(\"switch master failed as both are already registered, from[%s], to[%s]\",\n                  from.to_string(),\n                  to.to_string());\n            return false;\n        }\n\n        it->second.node = to;\n        it->second.rejected = false;\n        it->second.send_beacon_timer->cancel(true);\n        it->second.send_beacon_timer =\n            tasking::enqueue_timer(LPC_BEACON_SEND,\n                                   &_tracker,\n                                   [this, to]() { this->send_beacon(to, dsn_now_ms()); },\n                                   std::chrono::milliseconds(_beacon_interval_milliseconds),\n                                   0,\n                                   std::chrono::milliseconds(delay_milliseconds));\n\n        _masters.insert(std::make_pair(to, it->second));\n        _masters.erase(from);\n\n        ddebug(\"switch master successfully, from[%s], to[%s]\", from.to_string(), to.to_string());\n    } else {\n        dwarn(\"switch master failed as from node is not registered yet, from[%s], to[%s]\",\n              from.to_string(),\n              to.to_string());\n        return false;\n    }\n    return true;\n}\n\nbool failure_detector::is_time_greater_than(uint64_t ts, uint64_t base) { return ts > base; }\n\nvoid failure_detector::report(::dsn::rpc_address node, bool is_master, bool is_connected)\n{\n    ddebug(\"%s %sconnected: %s\",\n           is_master ? \"master\" : \"worker\",\n           is_connected ? \"\" : \"dis\",\n           node.to_string());\n}\n\n/*\n                            |--- lease period ----|lease IsExpired, commit suicide\n                 |--- lease period ---|\n    worker: ---------------------------------------------------------------->\n                 \\    /     \\    /      _\\\n             beacon ack  beacon ack       x (beacon deliver failed)\n                  _\\/        _\\/\n    master: ---------------------------------------------------------------->\n                    |---- grace period ----|\n                               |--- grace period ----| grace IsExpired, declare worker dead\n*/\n\nvoid failure_detector::check_all_records()\n{\n    if (!_is_started) {\n        return;\n    }\n\n    std::vector<rpc_address> expire;\n\n    {\n        zauto_lock l(_lock);\n\n        uint64_t now = dsn_now_ms();\n\n        for (auto itr = _masters.begin(); itr != _masters.end(); itr++) {\n            master_record &record = itr->second;\n\n            /*\n             * \"Check interval\" and \"send beacon\" are interleaved, so we must\n             * test if \"record will expire before next time we check all the records\"\n             * in order to guarantee the perfect fd\n             */\n            // we should ensure now is greater than record.last_send_time_for_beacon_with_ack\n            // to aviod integer overflow\n            if (record.is_alive &&\n                is_time_greater_than(now, record.last_send_time_for_beacon_with_ack) &&\n                now + _check_interval_milliseconds - record.last_send_time_for_beacon_with_ack >\n                    _lease_milliseconds) {\n                derror(\"master %s disconnected, now=%\" PRId64 \", last_send_time=%\" PRId64\n                       \", now+check_interval-last_send_time=%\" PRId64,\n                       record.node.to_string(),\n                       now,\n                       record.last_send_time_for_beacon_with_ack,\n                       now + _check_interval_milliseconds -\n                           record.last_send_time_for_beacon_with_ack);\n\n                expire.push_back(record.node);\n                record.is_alive = false;\n\n                report(record.node, true, false);\n            }\n        }\n\n        /*\n         * The disconnected event MUST be under the protection of the _lock\n         * we must guarantee that the record.is_alive switch and the connect/disconnect\n         * callbacks are ATOMIC as these callbacks usually have side effects.\n         *\n         * And you must be careful with these 2 virtual functions as it is very likely to have\n         * nested locks\n         */\n        if (expire.size() > 0) {\n            on_master_disconnected(expire);\n        }\n    }\n\n    // process recv record, for server\n    expire.clear();\n\n    {\n        zauto_lock l(_lock);\n\n        uint64_t now = dsn_now_ms();\n\n        for (auto itq = _workers.begin(); itq != _workers.end(); itq++) {\n            worker_record &record = itq->second;\n\n            // we should ensure now is greater than record.last_beacon_recv_time to aviod integer\n            // overflow\n            if (record.is_alive && is_time_greater_than(now, record.last_beacon_recv_time) &&\n                now - record.last_beacon_recv_time > _grace_milliseconds) {\n                derror(\"worker %s disconnected, now=%\" PRId64 \", last_beacon_recv_time=%\" PRId64\n                       \", now-last_recv=%\" PRId64,\n                       record.node.to_string(),\n                       now,\n                       record.last_beacon_recv_time,\n                       now - record.last_beacon_recv_time);\n\n                expire.push_back(record.node);\n                record.is_alive = false;\n\n                report(record.node, false, false);\n            }\n        }\n        /*\n         * The worker disconnected event also need to be under protection of the _lock\n         */\n        if (expire.size() > 0) {\n            on_worker_disconnected(expire);\n        }\n    }\n}\n\nvoid failure_detector::add_allow_list(::dsn::rpc_address node)\n{\n    zauto_lock l(_lock);\n    _allow_list.insert(node);\n}\n\nbool failure_detector::remove_from_allow_list(::dsn::rpc_address node)\n{\n    zauto_lock l(_lock);\n    return _allow_list.erase(node) > 0;\n}\n\nvoid failure_detector::set_allow_list(const std::vector<std::string> &replica_addrs)\n{\n    dassert(!_is_started, \"FD is already started, the allow list should really not be modified\");\n\n    std::vector<rpc_address> nodes;\n    for (auto &addr : replica_addrs) {\n        rpc_address node;\n        if (!node.from_string_ipv4(addr.c_str())) {\n            dwarn(\"replica_white_list has invalid ip %s, the allow list won't be modified\",\n                  addr.c_str());\n            return;\n        }\n        nodes.push_back(node);\n    }\n\n    for (auto &node : nodes)\n        add_allow_list(node);\n}\n\nstd::string failure_detector::get_allow_list(const std::vector<std::string> &args) const\n{\n    if (!_is_started)\n        return \"error: FD is not started\";\n\n    std::stringstream oss;\n    dsn::zauto_lock l(_lock);\n    oss << \"get ok: allow list \" << (_use_allow_list ? \"enabled. list: \" : \"disabled.\");\n    for (auto iter = _allow_list.begin(); iter != _allow_list.end(); ++iter) {\n        if (iter != _allow_list.begin())\n            oss << \",\";\n        oss << iter->to_string();\n    }\n    return oss.str();\n}\n\nvoid failure_detector::on_ping_internal(const beacon_msg &beacon, /*out*/ beacon_ack &ack)\n{\n    ack.time = beacon.time;\n    ack.this_node = beacon.to_addr;\n    ack.primary_node = dsn_primary_address();\n    ack.is_master = true;\n    ack.allowed = true;\n\n    zauto_lock l(_lock);\n\n    uint64_t now = dsn_now_ms();\n    auto node = beacon.from_addr;\n\n    worker_map::iterator itr = _workers.find(node);\n    if (itr == _workers.end()) {\n        // if is a new worker, check allow list first if need\n        if (_use_allow_list && _allow_list.find(node) == _allow_list.end()) {\n            dwarn(\"new worker[%s] is rejected\", node.to_string());\n            ack.allowed = false;\n            return;\n        }\n\n        // create new entry for node\n        worker_record record(node, now);\n        record.is_alive = true;\n        _workers.insert(std::make_pair(node, record));\n\n        report(node, false, true);\n        on_worker_connected(node);\n    } else if (is_time_greater_than(now, itr->second.last_beacon_recv_time)) {\n        // update last_beacon_recv_time\n        itr->second.last_beacon_recv_time = now;\n\n        ddebug(\"master %s update last_beacon_recv_time=%\" PRId64,\n               itr->second.node.to_string(),\n               itr->second.last_beacon_recv_time);\n\n        if (itr->second.is_alive == false) {\n            itr->second.is_alive = true;\n\n            report(node, false, true);\n            on_worker_connected(node);\n        }\n    } else {\n        ddebug(\"now[%\" PRId64 \"] <= last_recv_time[%\" PRId64 \"]\",\n               now,\n               itr->second.last_beacon_recv_time);\n    }\n}\n\nvoid failure_detector::on_ping(const beacon_msg &beacon, ::dsn::rpc_replier<beacon_ack> &reply)\n{\n    beacon_ack ack;\n    on_ping_internal(beacon, ack);\n    reply(ack);\n}\n\nvoid failure_detector::end_ping(::dsn::error_code err, const beacon_ack &ack, void *)\n{\n    end_ping_internal(err, ack);\n}\n\nbool failure_detector::end_ping_internal(::dsn::error_code err, const beacon_ack &ack)\n{\n    /*\n     * the caller of the end_ping_internal should lock necessarily!!!\n     */\n    uint64_t beacon_send_time = ack.time;\n    auto node = ack.this_node;\n\n    if (err != ERR_OK) {\n        dwarn(\"ping master(%s) failed, timeout_ms = %u, err = %s\",\n              node.to_string(),\n              _beacon_timeout_milliseconds,\n              err.to_string());\n        _recent_beacon_fail_count->increment();\n    }\n\n    master_map::iterator itr = _masters.find(node);\n\n    if (itr == _masters.end()) {\n        dwarn(\"received beacon ack without corresponding master, ignore it, \"\n              \"remote_master[%s], local_worker[%s]\",\n              node.to_string(),\n              dsn_primary_address().to_string());\n        return false;\n    }\n\n    master_record &record = itr->second;\n    if (!ack.allowed) {\n        dwarn(\"worker rejected, stop sending beacon message, \"\n              \"remote_master[%s], local_worker[%s]\",\n              node.to_string(),\n              dsn_primary_address().to_string());\n        record.rejected = true;\n        record.send_beacon_timer->cancel(true);\n        return false;\n    }\n\n    if (!is_time_greater_than(beacon_send_time, record.last_send_time_for_beacon_with_ack)) {\n        // out-dated beacon acks, do nothing\n        ddebug(\"ignore out dated beacon acks, send_time(%lld), last_beacon(%lld)\",\n               beacon_send_time,\n               record.last_send_time_for_beacon_with_ack);\n        return false;\n    }\n\n    // now the ack is applicable\n    if (err != ERR_OK) {\n        return true;\n    }\n\n    // if ack is not from master meta, worker should not update its last send time\n    if (!ack.is_master) {\n        dwarn(\"node[%s] is not master, ack.primary_node[%s] is real master\",\n              node.to_string(),\n              ack.primary_node.to_string());\n        return true;\n    }\n\n    // update last_send_time_for_beacon_with_ack\n    record.last_send_time_for_beacon_with_ack = beacon_send_time;\n    record.rejected = false;\n\n    ddebug(\"worker %s send beacon succeed, update last_send_time=%\" PRId64,\n           record.node.to_string(),\n           record.last_send_time_for_beacon_with_ack);\n\n    uint64_t now = dsn_now_ms();\n    // we should ensure now is greater than record.last_beacon_recv_time to aviod integer overflow\n    if (!record.is_alive && is_time_greater_than(now, record.last_send_time_for_beacon_with_ack) &&\n        now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds) {\n        // report master connected\n        report(node, true, true);\n        itr->second.is_alive = true;\n        on_master_connected(node);\n    }\n\n    return true;\n}\n\nbool failure_detector::unregister_master(::dsn::rpc_address node)\n{\n    zauto_lock l(_lock);\n    auto it = _masters.find(node);\n\n    if (it != _masters.end()) {\n        it->second.send_beacon_timer->cancel(true);\n        _masters.erase(it);\n        ddebug(\"unregister master[%s] successfully\", node.to_string());\n        return true;\n    } else {\n        ddebug(\"unregister master[%s] failed, cannot find it in FD\", node.to_string());\n        return false;\n    }\n}\n\nbool failure_detector::is_master_connected(::dsn::rpc_address node) const\n{\n    zauto_lock l(_lock);\n    auto it = _masters.find(node);\n    if (it != _masters.end())\n        return it->second.is_alive;\n    else\n        return false;\n}\n\nvoid failure_detector::register_worker(::dsn::rpc_address target, bool is_connected)\n{\n    /*\n     * callers should use the fd::_lock necessarily\n     */\n    worker_record record(target, dsn_now_ms());\n    record.is_alive = is_connected ? true : false;\n\n    auto ret = _workers.insert(std::make_pair(target, record));\n    if (ret.second) {\n        dinfo(\"register worker[%s] successfully\", target.to_string());\n    } else {\n        dinfo(\"worker[%s] already registered\", target.to_string());\n    }\n}\n\nbool failure_detector::unregister_worker(::dsn::rpc_address node)\n{\n    /*\n     * callers should use the fd::_lock necessarily\n     */\n    bool ret;\n\n    size_t count = _workers.erase(node);\n\n    if (count == 0) {\n        ret = false;\n    } else {\n        ret = true;\n    }\n\n    dinfo(\"unregister worker[%s] successfully, removed entry count is %u\",\n          node.to_string(),\n          (uint32_t)count);\n\n    return ret;\n}\n\nvoid failure_detector::clear_workers()\n{\n    zauto_lock l(_lock);\n    _workers.clear();\n}\n\nbool failure_detector::is_worker_connected(::dsn::rpc_address node) const\n{\n    zauto_lock l(_lock);\n    auto it = _workers.find(node);\n    if (it != _workers.end())\n        return it->second.is_alive;\n    else\n        return false;\n}\n\nvoid failure_detector::send_beacon(::dsn::rpc_address target, uint64_t time)\n{\n    beacon_msg beacon;\n    beacon.time = time;\n    beacon.from_addr = dsn_primary_address();\n    beacon.to_addr = target;\n    beacon.__set_start_time(static_cast<int64_t>(dsn::utils::process_start_millis()));\n\n    ddebug(\"send ping message, from[%s], to[%s], time[%\" PRId64 \"]\",\n           beacon.from_addr.to_string(),\n           beacon.to_addr.to_string(),\n           time);\n\n    ::dsn::rpc::call(target,\n                     RPC_FD_FAILURE_DETECTOR_PING,\n                     beacon,\n                     &_tracker,\n                     [=](error_code err, beacon_ack &&resp) {\n                         if (err != ::dsn::ERR_OK) {\n                             beacon_ack ack;\n                             ack.time = beacon.time;\n                             ack.this_node = beacon.to_addr;\n                             ack.primary_node.set_invalid();\n                             ack.is_master = false;\n                             ack.allowed = true;\n                             end_ping(err, ack, nullptr);\n                         } else {\n                             end_ping(err, std::move(resp), nullptr);\n                         }\n                     },\n                     std::chrono::milliseconds(_beacon_timeout_milliseconds));\n}\n}\n} // end namespace\n"
  },
  {
    "path": "src/failure_detector/failure_detector_multimaster.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <cinttypes>\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/dist/failure_detector_multimaster.h>\n#include <dsn/utility/rand.h>\n\nnamespace dsn {\nnamespace dist {\n\nslave_failure_detector_with_multimaster::slave_failure_detector_with_multimaster(\n    std::vector<::dsn::rpc_address> &meta_servers,\n    std::function<void()> &&master_disconnected_callback,\n    std::function<void()> &&master_connected_callback)\n{\n    _meta_servers.assign_group(\"meta-servers\");\n    for (auto &s : meta_servers) {\n        _meta_servers.group_address()->add(s);\n    }\n\n    _meta_servers.group_address()->set_leader(\n        meta_servers[rand::next_u32(0, (uint32_t)meta_servers.size() - 1)]);\n\n    // ATTENTION: here we disable dsn_group_set_update_leader_automatically to avoid\n    // failure detecting logic is affected by rpc failure or rpc forwarding.\n    _meta_servers.group_address()->set_update_leader_automatically(false);\n\n    _master_disconnected_callback = std::move(master_disconnected_callback);\n    _master_connected_callback = std::move(master_connected_callback);\n}\n\nvoid slave_failure_detector_with_multimaster::set_leader_for_test(rpc_address meta)\n{\n    _meta_servers.group_address()->set_leader(meta);\n}\n\nvoid slave_failure_detector_with_multimaster::end_ping(::dsn::error_code err,\n                                                       const fd::beacon_ack &ack,\n                                                       void *)\n{\n    ddebug(\"end ping result, error[%s], time[%\" PRId64\n           \"], ack.this_node[%s], ack.primary_node[%s], ack.is_master[%s], ack.allowed[%s]\",\n           err.to_string(),\n           ack.time,\n           ack.this_node.to_string(),\n           ack.primary_node.to_string(),\n           ack.is_master ? \"true\" : \"false\",\n           ack.allowed ? \"true\" : \"false\");\n\n    zauto_lock l(failure_detector::_lock);\n    if (!failure_detector::end_ping_internal(err, ack))\n        return;\n\n    dassert(ack.this_node == _meta_servers.group_address()->leader(),\n            \"ack.this_node[%s] vs meta_servers.leader[%s]\",\n            ack.this_node.to_string(),\n            _meta_servers.group_address()->leader().to_string());\n\n    if (ERR_OK != err) {\n        rpc_address next = _meta_servers.group_address()->next(ack.this_node);\n        if (next != ack.this_node) {\n            _meta_servers.group_address()->set_leader(next);\n            // do not start next send_beacon() immediately to avoid send rpc too frequently\n            switch_master(ack.this_node, next, 1000);\n        }\n    } else {\n        if (ack.is_master) {\n            // do nothing\n        } else if (ack.primary_node.is_invalid()) {\n            rpc_address next = _meta_servers.group_address()->next(ack.this_node);\n            if (next != ack.this_node) {\n                _meta_servers.group_address()->set_leader(next);\n                // do not start next send_beacon() immediately to avoid send rpc too frequently\n                switch_master(ack.this_node, next, 1000);\n            }\n        } else {\n            _meta_servers.group_address()->set_leader(ack.primary_node);\n            // start next send_beacon() immediately because the leader is possibly right.\n            switch_master(ack.this_node, ack.primary_node, 0);\n        }\n    }\n}\n\n// client side\nvoid slave_failure_detector_with_multimaster::on_master_disconnected(\n    const std::vector<::dsn::rpc_address> &nodes)\n{\n    bool primary_disconnected = false;\n    rpc_address leader = _meta_servers.group_address()->leader();\n    for (auto it = nodes.begin(); it != nodes.end(); ++it) {\n        if (leader == *it)\n            primary_disconnected = true;\n    }\n\n    if (primary_disconnected) {\n        _master_disconnected_callback();\n    }\n}\n\nvoid slave_failure_detector_with_multimaster::on_master_connected(::dsn::rpc_address node)\n{\n    /*\n    * well, this is called in on_ping_internal, which is called by rep::end_ping.\n    * So this function is called in the lock context of fd::_lock\n    */\n    bool is_primary = (_meta_servers.group_address()->leader() == node);\n    if (is_primary) {\n        _master_connected_callback();\n    }\n}\n}\n} // end namespace\n"
  },
  {
    "path": "src/failure_detector/fd.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\n\nnamespace cpp dsn.fd\n\nstruct beacon_msg\n{\n    1: i64 time;\n    2: dsn.rpc_address from_addr;\n    3: dsn.rpc_address to_addr;\n    4: optional i64 start_time;\n}\n\nstruct beacon_ack\n{\n    1: i64 time;\n    2: dsn.rpc_address this_node;\n    3: dsn.rpc_address primary_node;\n    4: bool is_master;\n    5: bool allowed;\n}\n\nstruct config_master_message\n{\n    1: dsn.rpc_address master;\n    2: bool is_register;\n}\n"
  },
  {
    "path": "src/failure_detector/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.failure_detector.tests)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#  Search mode for source files under CURRENT project directory?\n#  \"GLOB_RECURSE\" for recursive search\n#  \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n    dsn_runtime\n    dsn_meta_server\n    dsn_replica_server\n    dsn_replication_common\n    dsn.failure_detector\n    gtest\n    hashtable\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config-test.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config-whitelist-test.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config-whitelist-test-failed.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/gtest.filter\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/failure_detector/test/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf log.* *.log data\n"
  },
  {
    "path": "src/failure_detector/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK, THREAD_POOL_REPLICATION, THREAD_POOL_REPLICATION_LONG, THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = false\ncount = 0\npools = THREAD_POOL_DEFAULT, THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.test_worker]\ntype = worker\narguments =\nports = 40001\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[apps.test_master]\ntype = master\narguments =\nports = 30001\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 0\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[zookeeper]\nhosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n\n[fds_concurrent_test]\ntotal_files = 64\nmin_size = 100\nmax_size = 150\n\n"
  },
  {
    "path": "src/failure_detector/test/config-whitelist-test-failed.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK, THREAD_POOL_REPLICATION, THREAD_POOL_REPLICATION_LONG, THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = false\ncount = 0\npools = THREAD_POOL_DEFAULT, THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.test_worker]\ntype = worker\narguments =\nports = 40001\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[apps.test_master]\ntype = master\n#whitelist port,port,...\narguments = whitelist 22222\nports = 30001\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 0\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[zookeeper]\nhosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n\n[fds_concurrent_test]\ntotal_files = 64\nmin_size = 100\nmax_size = 150\n\n"
  },
  {
    "path": "src/failure_detector/test/config-whitelist-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK, THREAD_POOL_REPLICATION, THREAD_POOL_REPLICATION_LONG, THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = false\ncount = 0\npools = THREAD_POOL_DEFAULT, THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.test_worker]\ntype = worker\narguments =\nports = 40001\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[apps.test_master]\ntype = master\n#whitelist port,port,...\narguments = whitelist 40001\nports = 30001\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_FD\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 0\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[zookeeper]\nhosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n\n[fds_concurrent_test]\ntotal_files = 64\nmin_size = 100\nmax_size = 150\n\n"
  },
  {
    "path": "src/failure_detector/test/failure_detector.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/meta_options.h\"\n#include \"replica/replica_stub.h\"\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n#include <vector>\n\nusing namespace dsn;\nusing namespace dsn::fd;\n\n#define MPORT_START 30001\n#define WPORT 40001\n#define MCOUNT 3\n\nDEFINE_TASK_CODE_RPC(RPC_MASTER_CONFIG, TASK_PRIORITY_COMMON, THREAD_POOL_FD)\n\nstd::atomic_int started_apps(0);\nclass worker_fd_test : public ::dsn::dist::slave_failure_detector_with_multimaster\n{\nprivate:\n    volatile bool _send_ping_switch;\n    /* this function only triggerd once*/\n    std::function<void(rpc_address addr)> _connected_cb;\n    std::function<void(const std::vector<rpc_address> &)> _disconnected_cb;\n\nprotected:\n    virtual void send_beacon(::dsn::rpc_address node, uint64_t time) override\n    {\n        if (_send_ping_switch)\n            failure_detector::send_beacon(node, time);\n        else {\n            dinfo(\"ignore send beacon, to node[%s], time[%\" PRId64 \"]\", node.to_string(), time);\n        }\n    }\n\n    virtual void on_master_disconnected(const std::vector<rpc_address> &nodes) override\n    {\n        if (_disconnected_cb)\n            _disconnected_cb(nodes);\n    }\n\n    virtual void on_master_connected(rpc_address node) override\n    {\n        if (_connected_cb)\n            _connected_cb(node);\n    }\n\npublic:\n    worker_fd_test(replication::replica_stub *stub, std::vector<dsn::rpc_address> &meta_servers)\n        : slave_failure_detector_with_multimaster(meta_servers,\n                                                  [=]() { stub->on_meta_server_disconnected(); },\n                                                  [=]() { stub->on_meta_server_connected(); })\n    {\n        _send_ping_switch = false;\n    }\n    void toggle_send_ping(bool toggle) { _send_ping_switch = toggle; }\n    void when_connected(const std::function<void(rpc_address addr)> &func) { _connected_cb = func; }\n    void when_disconnected(const std::function<void(const std::vector<rpc_address> &nodes)> &func)\n    {\n        _disconnected_cb = func;\n    }\n    void clear()\n    {\n        _connected_cb = {};\n        _disconnected_cb = {};\n    }\n};\n\nclass master_fd_test : public replication::meta_server_failure_detector\n{\nprivate:\n    std::function<void(rpc_address addr)> _connected_cb;\n    std::function<void(const std::vector<rpc_address> &)> _disconnected_cb;\n    volatile bool _response_ping_switch;\n\npublic:\n    virtual void on_ping(const beacon_msg &beacon, ::dsn::rpc_replier<beacon_ack> &reply) override\n    {\n        if (_response_ping_switch)\n            meta_server_failure_detector::on_ping(beacon, reply);\n        else {\n            dinfo(\"ignore on ping, beacon msg, time[%\" PRId64 \"], from[%s], to[%s]\",\n                  beacon.time,\n                  beacon.from_addr.to_string(),\n                  beacon.to_addr.to_string());\n        }\n    }\n\n    virtual void on_worker_disconnected(const std::vector<rpc_address> &worker_list) override\n    {\n        if (_disconnected_cb)\n            _disconnected_cb(worker_list);\n    }\n    virtual void on_worker_connected(rpc_address node) override\n    {\n        if (_connected_cb)\n            _connected_cb(node);\n    }\n    master_fd_test() : meta_server_failure_detector(rpc_address(), false)\n    {\n        _response_ping_switch = true;\n    }\n    void toggle_response_ping(bool toggle) { _response_ping_switch = toggle; }\n    void when_connected(const std::function<void(rpc_address addr)> &func) { _connected_cb = func; }\n    void when_disconnected(const std::function<void(const std::vector<rpc_address> &nodes)> &func)\n    {\n        _disconnected_cb = func;\n    }\n    void test_register_worker(rpc_address node)\n    {\n        zauto_lock l(failure_detector::_lock);\n        register_worker(node);\n    }\n    void clear()\n    {\n        _connected_cb = {};\n        _disconnected_cb = {};\n    }\n};\n\nclass test_worker : public service_app, public serverlet<test_worker>\n{\npublic:\n    test_worker(const service_app_info *info) : service_app(info), serverlet(\"test_worker\") {}\n\n    error_code start(const std::vector<std::string> &args) override\n    {\n        std::vector<rpc_address> master_group;\n        for (int i = 0; i < 3; ++i)\n            master_group.push_back(rpc_address(\"localhost\", MPORT_START + i));\n        _worker_fd = new worker_fd_test(nullptr, master_group);\n        _worker_fd->start(1, 1, 9, 10);\n        ++started_apps;\n\n        register_rpc_handler(\n            RPC_MASTER_CONFIG, \"RPC_MASTER_CONFIG\", &test_worker::on_master_config);\n        return ERR_OK;\n    }\n\n    error_code stop(bool) override { return ERR_OK; }\n\n    void on_master_config(const config_master_message &request, bool &response)\n    {\n        dinfo(\"master config: request:%s, type:%s\",\n              request.master.to_string(),\n              request.is_register ? \"reg\" : \"unreg\");\n        if (request.is_register)\n            _worker_fd->register_master(request.master);\n        else\n            _worker_fd->unregister_master(request.master);\n        response = true;\n    }\n\n    worker_fd_test *fd() { return _worker_fd; }\nprivate:\n    worker_fd_test *_worker_fd;\n};\n\nclass test_master : public service_app\n{\npublic:\n    test_master(const service_app_info *info) : ::dsn::service_app(info) {}\n\n    error_code start(const std::vector<std::string> &args) override\n    {\n        _opts.stable_rs_min_running_seconds = 10;\n        _opts.max_succssive_unstable_restart = 10;\n\n        _master_fd = new master_fd_test();\n        _master_fd->set_options(&_opts);\n        bool use_allow_list = false;\n        if (args.size() >= 3 && args[1] == \"whitelist\") {\n            std::vector<std::string> ports;\n            utils::split_args(args[2].c_str(), ports, ',');\n            for (auto &port : ports) {\n                rpc_address addr;\n                addr.assign_ipv4(network::get_local_ipv4(), std::stoi(port));\n                _master_fd->add_allow_list(addr);\n            }\n            use_allow_list = true;\n        }\n\n        _master_fd->start(1, 1, 9, 10, use_allow_list);\n        dinfo(\"%s\", _master_fd->get_allow_list(std::vector<std::string>{}).c_str());\n        ++started_apps;\n\n        return ERR_OK;\n    }\n\n    error_code stop(bool) override { return ERR_OK; }\n\n    master_fd_test *fd() { return _master_fd; }\nprivate:\n    master_fd_test *_master_fd;\n    replication::fd_suboptions _opts;\n};\n\nbool spin_wait_condition(const std::function<bool()> &pred, int seconds)\n{\n    for (int i = 0; i != seconds; ++i) {\n        if (pred())\n            return true;\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n    return pred();\n}\n\nvoid fd_test_init()\n{\n    dsn::service_app::register_factory<test_worker>(\"worker\");\n    dsn::service_app::register_factory<test_master>(\"master\");\n    srand(time(0));\n}\n\nbool get_worker_and_master(test_worker *&worker, std::vector<test_master *> &masters)\n{\n    bool ans = spin_wait_condition([]() { return started_apps == MCOUNT + 1; }, 30);\n    if (!ans)\n        return false;\n\n    std::vector<service_app *> apps;\n    service_app::get_all_service_apps(&apps);\n    masters.resize(MCOUNT, nullptr);\n    worker = nullptr;\n\n    for (int i = 0; i != apps.size(); ++i) {\n        if (strcmp(apps[i]->info().type.c_str(), \"worker\") == 0) {\n            if (worker != nullptr)\n                return false;\n            worker = reinterpret_cast<test_worker *>(apps[i]);\n        } else if (strcmp(apps[i]->info().type.c_str(), \"master\") == 0) {\n            int index = apps[i]->info().index - 1;\n            if (index >= masters.size() || masters[index] != nullptr)\n                return false;\n            masters[index] = reinterpret_cast<test_master *>(apps[i]);\n        }\n    }\n\n    for (test_master *m : masters)\n        if (m == nullptr)\n            return false;\n    return true;\n}\n\nvoid master_group_set_leader(std::vector<test_master *> &master_group, int leader_index)\n{\n    rpc_address leader_addr(\"localhost\", MPORT_START + leader_index);\n    int i = 0;\n    for (test_master *&master : master_group) {\n        master->fd()->set_leader_for_test(leader_addr, leader_index == i);\n        i++;\n    }\n}\n\nvoid worker_set_leader(test_worker *worker, int leader_contact)\n{\n    worker->fd()->set_leader_for_test(rpc_address(\"localhost\", MPORT_START + leader_contact));\n\n    config_master_message msg;\n    msg.master = rpc_address(\"localhost\", MPORT_START + leader_contact);\n    msg.is_register = true;\n    error_code err;\n    bool response;\n    std::tie(err, response) = rpc::call_wait<bool>(\n        rpc_address(\"localhost\", WPORT), dsn::task_code(RPC_MASTER_CONFIG), msg);\n    ASSERT_EQ(err, ERR_OK);\n}\n\nvoid clear(test_worker *worker, std::vector<test_master *> masters)\n{\n    rpc_address leader = worker->fd()->get_servers().group_address()->leader();\n\n    config_master_message msg;\n    msg.master = leader;\n    msg.is_register = false;\n    error_code err;\n    bool response;\n    std::tie(err, response) = rpc::call_wait<bool>(\n        rpc_address(\"localhost\", WPORT), dsn::task_code(RPC_MASTER_CONFIG), msg);\n    ASSERT_EQ(err, ERR_OK);\n\n    worker->fd()->toggle_send_ping(false);\n\n    std::for_each(masters.begin(), masters.end(), [](test_master *mst) {\n        mst->fd()->clear_workers();\n        mst->fd()->toggle_response_ping(true);\n    });\n}\n\nvoid finish(test_worker *worker, test_master *master, int master_index)\n{\n    dwarn(\"start to finish\");\n    std::atomic_int wait_count;\n    wait_count.store(2);\n    worker->fd()->when_disconnected(\n        [&wait_count, master_index](const std::vector<rpc_address> &addr_list) mutable {\n            ASSERT_EQ(addr_list.size(), 1);\n            ASSERT_EQ(addr_list[0].port(), MPORT_START + master_index);\n            --wait_count;\n        });\n\n    master->fd()->when_disconnected(\n        [&wait_count](const std::vector<rpc_address> &addr_list) mutable {\n            ASSERT_EQ(addr_list.size(), 1);\n            ASSERT_EQ(addr_list[0].port(), WPORT);\n            --wait_count;\n        });\n\n    // we don't send any ping message now\n    worker->fd()->toggle_send_ping(false);\n    ASSERT_TRUE(spin_wait_condition([&wait_count] { return wait_count == 0; }, 20));\n    worker->fd()->clear();\n    master->fd()->clear();\n}\n\nTEST(fd, dummy_connect_disconnect)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n\n    clear(worker, masters);\n    // set master with smallest index as the leader\n    master_group_set_leader(masters, 0);\n    // set the worker contact leader\n    worker_set_leader(worker, 0);\n\n    test_master *leader = masters[0];\n    // simply wait for two connected\n    std::atomic_int wait_count;\n    wait_count.store(2);\n    worker->fd()->when_connected([&wait_count](rpc_address leader) mutable {\n        ASSERT_EQ(leader.port(), MPORT_START);\n        --wait_count;\n    });\n    leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable {\n        ASSERT_EQ(worker_addr.port(), WPORT);\n        --wait_count;\n    });\n\n    worker->fd()->toggle_send_ping(true);\n    ASSERT_TRUE(spin_wait_condition([&wait_count] { return wait_count == 0; }, 20));\n\n    finish(worker, leader, 0);\n}\n\nTEST(fd, master_redirect)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n\n    int index = masters.size() - 1;\n\n    clear(worker, masters);\n    /* leader is the last master*/\n    master_group_set_leader(masters, index);\n    // we contact to 0\n    worker_set_leader(worker, 0);\n\n    test_master *leader = masters[index];\n    std::atomic_int wait_count;\n    wait_count.store(2);\n    /* although we contact to the first master, but in the end we must connect to the right leader\n     */\n    worker->fd()->when_connected([&wait_count](rpc_address leader) mutable { --wait_count; });\n    leader->fd()->when_connected([&wait_count](rpc_address worker_addr) mutable {\n        ASSERT_EQ(worker_addr.port(), WPORT);\n        --wait_count;\n    });\n\n    worker->fd()->toggle_send_ping(true);\n    ASSERT_TRUE(spin_wait_condition([&wait_count] { return wait_count == 0; }, 20));\n    // in the end, the worker will connect to the right master\n    ASSERT_TRUE(spin_wait_condition(\n        [worker, index] {\n            return worker->fd()->current_server_contact().port() == MPORT_START + index;\n        },\n        20));\n\n    finish(worker, leader, index);\n}\n\nTEST(fd, switch_new_master_suddenly)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n\n    clear(worker, masters);\n\n    test_master *tst_master;\n    int index = 0;\n\n    master_group_set_leader(masters, index);\n    // and now we contact to 1\n    worker_set_leader(worker, 1);\n\n    tst_master = masters[index];\n    std::atomic_int wait_count;\n    wait_count.store(2);\n\n    auto cb = [&wait_count](rpc_address) mutable { --wait_count; };\n    worker->fd()->when_connected(cb);\n    tst_master->fd()->when_connected(cb);\n\n    worker->fd()->toggle_send_ping(true);\n    ASSERT_TRUE(spin_wait_condition([&wait_count]() { return wait_count == 0; }, 20));\n    ASSERT_EQ(worker->fd()->current_server_contact().port(), MPORT_START + index);\n\n    worker->fd()->when_connected(nullptr);\n    /* we select a new leader */\n    index = masters.size() - 1;\n    tst_master = masters[index];\n    /*\n     * for perfect FD, the new master should assume the worker connected.\n     * But first we test if the worker can connect to the new master.\n     * So clear all the workers\n     */\n    tst_master->fd()->clear_workers();\n    wait_count.store(1);\n    tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable {\n        ASSERT_EQ(addr.port(), WPORT);\n        --wait_count;\n    });\n    master_group_set_leader(masters, index);\n\n    /* now we can worker the worker to connect to the new master */\n    ASSERT_TRUE(spin_wait_condition([&wait_count]() { return wait_count == 0; }, 20));\n    /* it may takes time for worker to switch to new master, but 20 seconds\n     * is enough as in our setting, lease_period is 9 seconds. */\n    ASSERT_TRUE(spin_wait_condition(\n        [worker, index]() {\n            return worker->fd()->current_server_contact().port() == MPORT_START + index;\n        },\n        20));\n\n    finish(worker, tst_master, index);\n}\n\nTEST(fd, old_master_died)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n    clear(worker, masters);\n\n    test_master *tst_master;\n    int index = 0;\n    master_group_set_leader(masters, index);\n    // and now we contact to 0\n    worker_set_leader(worker, 0);\n\n    tst_master = masters[index];\n    std::atomic_int wait_count;\n    wait_count.store(2);\n\n    auto cb = [&wait_count](rpc_address) mutable { --wait_count; };\n    worker->fd()->when_connected(cb);\n    tst_master->fd()->when_connected(cb);\n\n    worker->fd()->toggle_send_ping(true);\n    ASSERT_TRUE(spin_wait_condition([&wait_count]() -> bool { return wait_count == 0; }, 20));\n    ASSERT_EQ(worker->fd()->current_server_contact().port(), MPORT_START + index);\n\n    worker->fd()->when_connected(nullptr);\n    tst_master->fd()->when_connected(nullptr);\n\n    worker->fd()->when_disconnected([](const std::vector<rpc_address> &masters_list) {\n        ASSERT_EQ(masters_list.size(), 1);\n        dinfo(\"disconnect from master: %s\", masters_list[0].to_string());\n    });\n\n    /*first let's stop the old master*/\n    tst_master->fd()->toggle_response_ping(false);\n    /* then select a new one */\n    index = masters.size() - 1;\n    tst_master = masters[index];\n\n    /* only for test */\n    tst_master->fd()->clear_workers();\n    wait_count.store(1);\n\n    tst_master->fd()->when_connected([&wait_count](rpc_address addr) mutable {\n        EXPECT_EQ(addr.port(), WPORT);\n        --wait_count;\n    });\n    master_group_set_leader(masters, index);\n\n    /* now we can wait the worker to connect to the new master */\n    ASSERT_TRUE(spin_wait_condition([&wait_count]() { return wait_count == 0; }, 20));\n    /* it may takes time for worker to switch to new master, but 20 seconds\n     * is enough as in our setting, lease_period is 9 seconds. */\n    ASSERT_TRUE(spin_wait_condition(\n        [worker, index]() {\n            return worker->fd()->current_server_contact().port() == MPORT_START + index;\n        },\n        20));\n\n    finish(worker, tst_master, index);\n}\n\nTEST(fd, worker_died_when_switch_master)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n    clear(worker, masters);\n\n    test_master *tst_master;\n    int index = 0;\n    master_group_set_leader(masters, index);\n    // and now we contact to 0\n    worker_set_leader(worker, 0);\n\n    tst_master = masters[index];\n    std::atomic_int wait_count;\n    wait_count.store(2);\n\n    auto cb = [&wait_count](rpc_address) mutable { --wait_count; };\n    worker->fd()->when_connected(cb);\n    tst_master->fd()->when_connected(cb);\n\n    worker->fd()->toggle_send_ping(true);\n    ASSERT_TRUE(spin_wait_condition([&wait_count]() { return wait_count == 0; }, 20));\n    ASSERT_EQ(worker->fd()->current_server_contact().port(), MPORT_START + index);\n\n    worker->fd()->when_connected(nullptr);\n    tst_master->fd()->when_connected(nullptr);\n\n    /*first stop the old leader*/\n    tst_master->fd()->toggle_response_ping(false);\n\n    /*then select another leader*/\n    index = masters.size() - 1;\n    tst_master = masters[index];\n\n    wait_count.store(2);\n    tst_master->fd()->when_disconnected(\n        [&wait_count](const std::vector<rpc_address> &worker_list) mutable {\n            ASSERT_EQ(worker_list.size(), 1);\n            ASSERT_EQ(worker_list[0].port(), WPORT);\n            wait_count--;\n        });\n    worker->fd()->when_disconnected(\n        [&wait_count](const std::vector<rpc_address> &master_list) mutable {\n            ASSERT_EQ(master_list.size(), 1);\n            wait_count--;\n        });\n\n    /* we assume the worker is alive */\n    tst_master->fd()->test_register_worker(rpc_address(\"localhost\", WPORT));\n    master_group_set_leader(masters, index);\n\n    /* then stop the worker*/\n    worker->fd()->toggle_send_ping(false);\n    ASSERT_TRUE(spin_wait_condition([&wait_count] { return wait_count == 0; }, 20));\n}\n\ndsn::message_ex *create_fake_rpc_response()\n{\n    dsn::message_ex *req =\n        dsn::message_ex::create_received_request(RPC_MASTER_CONFIG, DSF_THRIFT_BINARY, nullptr, 0);\n    dsn::message_ex *response = req->create_response();\n    req->add_ref();\n    req->release_ref();\n    return response;\n}\n\nTEST(fd, update_stability)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n    clear(worker, masters);\n\n    master_group_set_leader(masters, 0);\n    master_fd_test *fd = masters[0]->fd();\n    fd->toggle_response_ping(true);\n\n    replication::fd_suboptions opts;\n    opts.stable_rs_min_running_seconds = 5;\n    opts.max_succssive_unstable_restart = 2;\n    fd->set_options(&opts);\n\n    replication::meta_server_failure_detector::stability_map *smap =\n        fd->get_stability_map_for_test();\n    smap->clear();\n\n    dsn::rpc_replier<beacon_ack> r(create_fake_rpc_response());\n    beacon_msg msg;\n    msg.from_addr = rpc_address(\"localhost\", 123);\n    msg.to_addr = rpc_address(\"localhost\", MPORT_START);\n    msg.time = dsn_now_ms();\n    msg.__isset.start_time = true;\n    msg.start_time = 1000;\n\n    // first on ping\n    fd->on_ping(msg, r);\n    ASSERT_EQ(1, smap->size());\n    ASSERT_NE(smap->end(), smap->find(msg.from_addr));\n\n    replication::meta_server_failure_detector::worker_stability &ws =\n        smap->find(msg.from_addr)->second;\n    ASSERT_EQ(0, ws.unstable_restart_count);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // unstably restart and resend ping\n    msg.start_time += 4000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(1, ws.unstable_restart_count);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // upstably restart and resend ping again, the node's ping will be ignored\n    msg.start_time += 4000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(2, ws.unstable_restart_count);\n    ASSERT_FALSE(r.is_empty());\n\n    // stably restart, the meta stability & unstable_count will be reset\n    msg.start_time += 10000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(0, ws.unstable_restart_count);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // unstably restart, unstable-count++\n    msg.start_time += 4000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(1, ws.unstable_restart_count);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // not restart, unstable-count will be reset\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(0, ws.unstable_restart_count);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // old message, will be ignored\n    msg.start_time -= 4000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time + 4000, ws.last_start_time_ms);\n    ASSERT_EQ(0, ws.unstable_restart_count);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // unstable restart, unstable-count++\n    msg.start_time += 8000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(1, ws.unstable_restart_count);\n    ASSERT_TRUE(r.is_empty());\n    r = dsn::rpc_replier<beacon_ack>(create_fake_rpc_response());\n\n    // unstable restart, unstable-count++, node's ping will be ignored\n    msg.start_time += 4000;\n    fd->on_ping(msg, r);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(2, ws.unstable_restart_count);\n    ASSERT_FALSE(r.is_empty());\n\n    // reset stat\n    fd->reset_stability_stat(msg.from_addr);\n    ASSERT_EQ(msg.start_time, ws.last_start_time_ms);\n    ASSERT_EQ(0, ws.unstable_restart_count);\n}\n\nTEST(fd, not_in_whitelist)\n{\n    test_worker *worker;\n    std::vector<test_master *> masters;\n    ASSERT_TRUE(get_worker_and_master(worker, masters));\n\n    clear(worker, masters);\n    // set master with smallest index as the leader\n    master_group_set_leader(masters, 0);\n    // set the worker contact leader\n    worker_set_leader(worker, 0);\n\n    std::atomic_int wait_count;\n    wait_count.store(1);\n    auto cb = [&wait_count](rpc_address) mutable { --wait_count; };\n    worker->fd()->when_connected(cb);\n    worker->fd()->toggle_send_ping(true);\n\n    ASSERT_TRUE(spin_wait_condition([&wait_count] { return wait_count == 1; }, 20));\n}\n"
  },
  {
    "path": "src/failure_detector/test/gtest.filter",
    "content": "config-test.ini -fd.not_in_whitelist\nconfig-whitelist-test.ini -fd.not_in_whitelist\nconfig-whitelist-test-failed.ini fd.not_in_whitelist\n"
  },
  {
    "path": "src/failure_detector/test/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <iostream>\n\n#ifndef _WIN32\n#include <sys/types.h>\n#include <signal.h>\n#endif\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nextern void fd_test_init();\n\nclass test_client : public ::dsn::service_app\n{\npublic:\n    test_client(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    ::dsn::error_code start(const std::vector<std::string> &args)\n    {\n        int argc = args.size();\n        char *argv[20];\n        for (int i = 0; i < argc; ++i) {\n            argv[i] = (char *)(args[i].c_str());\n        }\n        testing::InitGoogleTest(&argc, argv);\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return ::dsn::ERR_OK;\n    }\n\n    ::dsn::error_code stop(bool cleanup = false) { return ::dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    // register all possible services\n    dsn::service_app::register_factory<test_client>(\"test\");\n    fd_test_init();\n\n    // specify what services and tools will run in config file, then run\n    if (argc < 2)\n        dsn_run_config(\"config-test.ini\", false);\n    else\n        dsn_run_config(argv[1], false);\n\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(g_test_ret);\n#endif\n    return g_test_ret;\n}\n"
  },
  {
    "path": "src/failure_detector/test/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\nwhile read -r -a line; do\n    test_case=${line[0]}\n    gtest_filter=${line[1]}\n    output_xml=\"${REPORT_DIR}/dsn.failure_detector.tests_${test_case/.ini/.xml}\"\n    echo \"============ run dsn.failure_detector.tests ${test_case} with gtest_filter ${gtest_filter} ============\"\n    ./clear.sh\n    GTEST_OUTPUT=\"xml:${output_xml}\" GTEST_FILTER=${gtest_filter} ./dsn.failure_detector.tests ${test_case}\n\n    if [ $? -ne 0 ]; then\n        echo \"run dsn.failure_detector.tests $test_case failed\"\n        echo \"---- ls ----\"\n        ls -l\n        if find . -name log.1.txt; then\n            echo \"---- tail -n 100 log.1.txt ----\"\n            tail -n 100 `find . -name log.1.txt`\n        fi\n        if [ -f core ]; then\n            echo \"---- gdb ./dsn.failure_detector.tests core ----\"\n            gdb ./dsn.failure_detector.tests core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n        fi\n        exit 1\n    fi\n    echo \"============ done dsn.failure_detector.tests ${test_case} with gtest_filter ${gtest_filter} ============\"\ndone <gtest.filter\n"
  },
  {
    "path": "src/http/CMakeLists.txt",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nset(MY_PROJ_NAME dsn_http)\n\nset(MY_PROJ_SRC ${PROJECT_SOURCE_DIR}/thirdparty/build/Source/http-parser/http_parser.c)\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\ndsn_add_static_library()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/http/builtin_http_calls.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/output_utils.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"builtin_http_calls.h\"\n#include \"http_call_registry.h\"\n#include \"pprof_http_service.h\"\n#include \"service_version.h\"\n\nnamespace dsn {\n\n/*extern*/ void get_help_handler(const http_request &req, http_response &resp)\n{\n    utils::table_printer tp;\n    std::ostringstream oss;\n    auto calls = http_call_registry::instance().list_all_calls();\n    for (const auto &call : calls) {\n        tp.add_row_name_and_data(std::string(\"/\") + call->path, call->help);\n    }\n    tp.output(oss, utils::table_printer::output_format::kJsonCompact);\n    resp.body = oss.str();\n    resp.status_code = http_status_code::ok;\n}\n\n/*extern*/ void get_version_handler(const http_request &req, http_response &resp)\n{\n    std::ostringstream out;\n    dsn::utils::table_printer tp;\n\n    tp.add_row_name_and_data(\"Version\", app_version.version);\n    tp.add_row_name_and_data(\"GitCommit\", app_version.git_commit);\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\n/*extern*/ void get_recent_start_time_handler(const http_request &req, http_response &resp)\n{\n    char start_time[100];\n    dsn::utils::time_ms_to_date_time(dsn::utils::process_start_millis(), start_time, 100);\n    std::ostringstream out;\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"RecentStartTime\", start_time);\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\n/*extern*/ void register_builtin_http_calls()\n{\n#ifdef DSN_ENABLE_GPERF\n    static pprof_http_service pprof_svc;\n#endif\n\n    register_http_call(\"\")\n        .with_callback(\n            [](const http_request &req, http_response &resp) { get_help_handler(req, resp); })\n        .with_help(\"Lists all supported calls\");\n\n    register_http_call(\"version\")\n        .with_callback(\n            [](const http_request &req, http_response &resp) { get_version_handler(req, resp); })\n        .with_help(\"Gets the server version.\");\n\n    register_http_call(\"recentStartTime\")\n        .with_callback([](const http_request &req, http_response &resp) {\n            get_recent_start_time_handler(req, resp);\n        })\n        .with_help(\"Gets the server start time.\");\n\n    register_http_call(\"perfCounter\")\n        .with_callback([](const http_request &req, http_response &resp) {\n            get_perf_counter_handler(req, resp);\n        })\n        .with_help(\"Gets the value of a perf counter\");\n\n    register_http_call(\"updateConfig\")\n        .with_callback(\n            [](const http_request &req, http_response &resp) { update_config(req, resp); })\n        .with_help(\"Updates the value of a config\");\n\n    register_http_call(\"config\")\n        .with_callback([](const http_request &req, http_response &resp) { get_config(req, resp); })\n        .with_help(\"get the details of a specified config\");\n\n    register_http_call(\"configs\")\n        .with_callback(\n            [](const http_request &req, http_response &resp) { list_all_configs(req, resp); })\n        .with_help(\"list all configs\");\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/builtin_http_calls.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/cpp/serverlet.h>\n#include <dsn/http/http_server.h>\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\n\n// Register basic services for the HTTP server.\nextern void register_builtin_http_calls();\n\nextern void get_perf_counter_handler(const http_request &req, http_response &resp);\n\nextern void get_help_handler(const http_request &req, http_response &resp);\n\n// Get <meta_server_ipport>/version\n// Request body:\n// {\n//    Version: \"2.1.SNAPSHOT\",\n//    GitCommit: \"88783e1ec28c326974f808d91c1531391d38acb5\"\n// }\nextern void get_version_handler(const http_request &req, http_response &resp);\n\nextern void get_recent_start_time_handler(const http_request &req, http_response &resp);\n\nextern void update_config(const http_request &req, http_response &resp);\n\nextern void list_all_configs(const http_request &req, http_response &resp);\n\nextern void get_config(const http_request &req, http_response &resp);\n} // namespace dsn\n"
  },
  {
    "path": "src/http/config_http_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/http/http_server.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/output_utils.h>\n\nnamespace dsn {\nvoid update_config(const http_request &req, http_response &resp)\n{\n    if (req.query_args.size() != 1) {\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    auto iter = req.query_args.begin();\n    auto res = update_flag(iter->first, iter->second);\n\n    utils::table_printer tp;\n    tp.add_row_name_and_data(\"update_status\", res.description());\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid list_all_configs(const http_request &req, http_response &resp)\n{\n    if (!req.query_args.empty()) {\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    resp.body = list_all_flags();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid get_config(const http_request &req, http_response &resp)\n{\n    std::string config_name;\n    for (const auto &p : req.query_args) {\n        if (\"name\" == p.first) {\n            config_name = p.second;\n        } else {\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n\n    auto res = get_flag_str(config_name);\n    if (res.is_ok()) {\n        resp.body = res.get_value();\n    } else {\n        resp.body = res.get_error().description();\n    }\n    resp.status_code = http_status_code::ok;\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/http/http_call_registry.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/http/http_server.h>\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\n\n// A singleton registry for all the HTTP calls\nclass http_call_registry : public utils::singleton<http_call_registry>\n{\npublic:\n    std::shared_ptr<http_call> find(const std::string &path) const\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n        auto it = _call_map.find(path);\n        if (it == _call_map.end()) {\n            return nullptr;\n        }\n        return it->second;\n    }\n\n    void remove(const std::string &path)\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n        _call_map.erase(path);\n    }\n\n    void add(std::unique_ptr<http_call> call_uptr)\n    {\n        auto call = std::shared_ptr<http_call>(call_uptr.release());\n        std::lock_guard<std::mutex> guard(_mu);\n        dassert(_call_map.find(call->path) == _call_map.end(),\n                \"repeatedly register http call \\\"%s\\\"\",\n                call->path.c_str());\n        _call_map[call->path] = call;\n    }\n\n    std::vector<std::shared_ptr<http_call>> list_all_calls() const\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n\n        std::vector<std::shared_ptr<http_call>> ret;\n        for (const auto &kv : _call_map) {\n            ret.push_back(kv.second);\n        }\n        return ret;\n    }\n\nprivate:\n    friend class utils::singleton<http_call_registry>;\n    http_call_registry() = default;\n    ~http_call_registry() = default;\n\nprivate:\n    mutable std::mutex _mu;\n    std::map<std::string, std::shared_ptr<http_call>> _call_map;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/http_message_parser.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"http_message_parser.h\"\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/crc.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/cpp/serialization.h>\n#include <dsn/c/api_layer1.h>\n#include <dsn/http/http_server.h>\n#include <iomanip>\n\nnamespace dsn {\n\nstruct parser_context\n{\n    http_message_parser *parser;\n    message_reader *reader;\n};\n\n/*extern*/ const char *http_parser_stage_to_string(http_parser_stage s)\n{\n    switch (s) {\n    case HTTP_ON_MESSAGE_BEGIN:\n        return \"HTTP_ON_MESSAGE_BEGIN\";\n    case HTTP_ON_URL:\n        return \"HTTP_ON_URL\";\n    case HTTP_ON_STATUS:\n        return \"HTTP_ON_STATUS\";\n    case HTTP_ON_HEADER_FIELD:\n        return \"HTTP_ON_HEADER_FIELD\";\n    case HTTP_ON_HEADER_VALUE:\n        return \"HTTP_ON_HEADER_VALUE\";\n    case HTTP_ON_HEADERS_COMPLETE:\n        return \"HTTP_ON_HEADERS_COMPLETE\";\n    case HTTP_ON_BODY:\n        return \"HTTP_ON_BODY\";\n    case HTTP_ON_MESSAGE_COMPLETE:\n        return \"HTTP_ON_MESSAGE_COMPLETE\";\n    default:\n        return \"invalid\";\n    }\n}\n\nhttp_message_parser::http_message_parser()\n{\n    memset(&_parser_setting, 0, sizeof(_parser_setting));\n\n    _parser_setting.on_message_begin = [](http_parser *parser) -> int {\n        auto &msg = static_cast<parser_context *>(parser->data)->parser->_current_message;\n\n        // initialize http message\n        // msg->buffers[0] = header\n        // msg->buffers[1] = body (blob())\n        msg.reset(message_ex::create_receive_message_with_standalone_header(blob()));\n        msg->buffers.resize(HTTP_MSG_BUFFERS_NUM);\n\n        message_header *header = msg->header;\n        header->hdr_length = sizeof(message_header);\n        header->hdr_crc32 = header->body_crc32 = CRC_INVALID;\n        strcpy(header->rpc_name, \"RPC_HTTP_SERVICE\");\n        return 0;\n    };\n\n    _parser_setting.on_url = [](http_parser *parser, const char *at, size_t length) -> int {\n        http_message_parser *msg_parser = static_cast<parser_context *>(parser->data)->parser;\n        msg_parser->_stage = HTTP_ON_URL;\n        msg_parser->_url.append(at, length);\n        return 0;\n    };\n\n    _parser_setting.on_header_field =\n        [](http_parser *parser, const char *at, size_t length) -> int {\n        http_message_parser *msg_parser = static_cast<parser_context *>(parser->data)->parser;\n        msg_parser->_stage = HTTP_ON_HEADER_FIELD;\n        if (strncmp(at, \"Content-Type\", length) == 0) {\n            msg_parser->_is_field_content_type = true;\n        }\n        return 0;\n    };\n\n    _parser_setting.on_header_value =\n        [](http_parser *parser, const char *at, size_t length) -> int {\n        http_message_parser *msg_parser = static_cast<parser_context *>(parser->data)->parser;\n        msg_parser->_stage = HTTP_ON_HEADER_VALUE;\n        if (msg_parser->_is_field_content_type) {\n            auto &msg = msg_parser->_current_message;\n            // msg->buffers[3] = content-type\n            msg->buffers[3] = blob::create_from_bytes(at, length);\n            msg_parser->_is_field_content_type = false;\n        }\n        return 0;\n    };\n\n    _parser_setting.on_headers_complete = [](http_parser *parser) -> int {\n        http_message_parser *msg_parser = static_cast<parser_context *>(parser->data)->parser;\n        msg_parser->_stage = HTTP_ON_HEADERS_COMPLETE;\n\n        auto &msg = msg_parser->_current_message;\n\n        // msg->buffers[2] = url\n        msg->buffers[2] = blob::create_from_bytes(std::move(msg_parser->_url));\n\n        message_header *header = msg->header;\n        if (parser->type == HTTP_REQUEST && parser->method == HTTP_GET) {\n            header->hdr_type = http_method::HTTP_METHOD_GET;\n            header->context.u.is_request = 1;\n        } else if (parser->type == HTTP_REQUEST && parser->method == HTTP_POST) {\n            header->hdr_type = http_method::HTTP_METHOD_POST;\n            header->context.u.is_request = 1;\n        } else {\n            derror(\"invalid http type %d and method %d\", parser->type, parser->method);\n            return 1;\n        }\n        return 0;\n    };\n\n    _parser_setting.on_message_complete = [](http_parser *parser) -> int {\n        auto message_parser = static_cast<parser_context *>(parser->data)->parser;\n        message_parser->_received_messages.emplace(std::move(message_parser->_current_message));\n        message_parser->_stage = HTTP_ON_MESSAGE_COMPLETE;\n        return 0;\n    };\n\n    // rDSN application can only serve as http server, support for http client is not in our plan.\n    http_parser_init(&_parser, HTTP_REQUEST);\n}\n\nmessage_ex *http_message_parser::get_message_on_receive(message_reader *reader,\n                                                        /*out*/ int &read_next)\n{\n    read_next = 4096;\n\n    if (reader->_buffer_occupied > 0) {\n        parser_context ctx{this, reader};\n        _parser.data = &ctx;\n\n        _parser_setting.on_body = [](http_parser *parser, const char *at, size_t length) -> int {\n            auto data = static_cast<parser_context *>(parser->data);\n            auto &msg = data->parser->_current_message;\n            blob read_buf = data->reader->_buffer;\n\n            // set http body\n            msg->buffers[1].assign(read_buf.buffer(), at - read_buf.buffer_ptr(), length);\n            msg->header->body_length = length;\n            return 0;\n        };\n\n        auto nparsed = http_parser_execute(\n            &_parser, &_parser_setting, reader->_buffer.data(), reader->_buffer_occupied);\n\n        // error handling\n        if (_parser.http_errno != HPE_OK) {\n            auto err = HTTP_PARSER_ERRNO(&_parser);\n            derror(\"failed on stage %s [%s]\",\n                   http_parser_stage_to_string(_stage),\n                   http_errno_description(err));\n\n            read_next = -1;\n            return nullptr;\n        }\n\n        _parsed_length += nparsed;\n        if (is_complete()) {\n            // parsing complete\n            reader->_buffer = reader->_buffer.range(_parsed_length);\n            reader->_buffer_occupied -= _parsed_length;\n            reset();\n        }\n    }\n\n    if (!_received_messages.empty()) {\n        std::unique_ptr<message_ex> msg = std::move(_received_messages.front());\n        _received_messages.pop();\n        msg->hdr_format = NET_HDR_HTTP;\n        return msg.release();\n    } else {\n        return nullptr;\n    }\n}\n\nvoid http_message_parser::prepare_on_send(message_ex *msg)\n{\n    const message_header *header = msg->header;\n    std::vector<blob> &buffers = msg->buffers;\n\n    dassert(!header->context.u.is_request, \"send response only\");\n\n    unsigned int dsn_size = sizeof(message_header) + header->body_length;\n    int dsn_buf_count = 0;\n    while (dsn_size > 0 && dsn_buf_count < buffers.size()) {\n        blob &buf = buffers[dsn_buf_count];\n        dassert(dsn_size >= buf.length(), \"%u VS %u\", dsn_size, buf.length());\n        dsn_size -= buf.length();\n        ++dsn_buf_count;\n    }\n    dassert(dsn_size == 0, \"dsn_size = %u\", dsn_size);\n\n    buffers.resize(dsn_buf_count);\n}\n\nint http_message_parser::get_buffers_on_send(message_ex *msg, send_buf *buffers)\n{\n    // we must skip the message header\n    unsigned int offset = sizeof(message_header);\n    int i = 0;\n    for (blob &buf : msg->buffers) {\n        if (offset >= buf.length()) {\n            offset -= buf.length();\n            continue;\n        }\n        buffers[i].buf = (void *)(buf.data() + offset);\n        buffers[i].sz = buf.length() - offset;\n        offset = 0;\n        ++i;\n    }\n    return i;\n}\n\nvoid http_message_parser::reset()\n{\n    _current_message.reset();\n    _url.clear();\n    _stage = HTTP_INVALID;\n    _parsed_length = 0;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/http_message_parser.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/ports.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/message_parser.h>\n#include <vector>\n#include <queue>\n\n#include <nodejs/http_parser.h>\n\nnamespace dsn {\n\nDEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_HTTP)\n\n// Number of blobs that a message_ex contains.\n#define HTTP_MSG_BUFFERS_NUM 4\n\n// Incoming HTTP requests will be parsed into:\n//\n//    msg->header->rpc_name = \"RPC_HTTP_SERVICE\"\n//    msg->header->body_length = http body length\n//    msg->header->hdr_type = \"POST\" / \"GET \";\n//    msg->hdr_format = NET_HDR_HTTP\n//    msg->buffers[0] = header\n//    msg->buffers[1] = body\n//    msg->buffers[2] = url\n//    msg->buffers[3] = content-type\n//\n\nenum http_parser_stage\n{\n    HTTP_INVALID,\n    HTTP_ON_MESSAGE_BEGIN,\n    HTTP_ON_URL,\n    HTTP_ON_STATUS,\n    HTTP_ON_HEADER_FIELD,\n    HTTP_ON_HEADER_VALUE,\n    HTTP_ON_HEADERS_COMPLETE,\n    HTTP_ON_BODY,\n    HTTP_ON_MESSAGE_COMPLETE\n};\n\nextern const char *http_parser_stage_to_string(http_parser_stage s);\n\nclass http_message_parser : public message_parser\n{\npublic:\n    http_message_parser();\n\n    ~http_message_parser() override = default;\n\n    message_ex *get_message_on_receive(message_reader *reader,\n                                       /*out*/ int &read_next) override;\n\n    void prepare_on_send(message_ex *msg) override;\n\n    int get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers) override;\n\nprivate:\n    friend class http_message_parser_test;\n\n    bool is_complete() const { return _stage == HTTP_ON_MESSAGE_COMPLETE; }\n\n    void reset() override;\n\nprivate:\n    // see https://github.com/joyent/http-parser\n    http_parser_settings _parser_setting;\n    http_parser _parser;\n\n    bool _is_field_content_type{false};\n    std::unique_ptr<message_ex> _current_message;\n    http_parser_stage _stage{HTTP_INVALID};\n    std::string _url;\n    size_t _parsed_length{0};\n    std::queue<std::unique_ptr<message_ex>> _received_messages;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/http_server.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/http/http_server.h>\n#include <dsn/tool_api.h>\n#include <dsn/utils/time_utils.h>\n#include <boost/algorithm/string.hpp>\n#include <fmt/ostream.h>\n\n#include \"http_message_parser.h\"\n#include \"pprof_http_service.h\"\n#include \"builtin_http_calls.h\"\n#include \"uri_decoder.h\"\n#include \"http_call_registry.h\"\n#include \"http_server_impl.h\"\n\nnamespace dsn {\n\nDSN_DEFINE_bool(\"http\", enable_http_server, true, \"whether to enable the embedded HTTP server\");\n\n/*extern*/ std::string http_status_code_to_string(http_status_code code)\n{\n    switch (code) {\n    case http_status_code::ok:\n        return \"200 OK\";\n    case http_status_code::temporary_redirect:\n        return \"307 Temporary Redirect\";\n    case http_status_code::bad_request:\n        return \"400 Bad Request\";\n    case http_status_code::not_found:\n        return \"404 Not Found\";\n    case http_status_code::internal_server_error:\n        return \"500 Internal Server Error\";\n    default:\n        dfatal(\"invalid code: %d\", code);\n        __builtin_unreachable();\n    }\n}\n\n/*extern*/ http_call &register_http_call(std::string full_path)\n{\n    auto call_ptr = dsn::make_unique<http_call>();\n    call_ptr->path = std::move(full_path);\n    http_call &call = *call_ptr;\n    http_call_registry::instance().add(std::move(call_ptr));\n    return call;\n}\n\n/*extern*/ void deregister_http_call(const std::string &full_path)\n{\n    http_call_registry::instance().remove(full_path);\n}\n\nvoid http_service::register_handler(std::string path, http_callback cb, std::string help)\n{\n    if (!FLAGS_enable_http_server) {\n        return;\n    }\n    auto call = make_unique<http_call>();\n    call->path = this->path();\n    if (!path.empty()) {\n        call->path += \"/\" + std::move(path);\n    }\n    call->callback = std::move(cb);\n    call->help = std::move(help);\n    http_call_registry::instance().add(std::move(call));\n}\n\nhttp_server::http_server() : serverlet<http_server>(\"http_server\")\n{\n    if (!FLAGS_enable_http_server) {\n        return;\n    }\n\n    register_rpc_handler(RPC_HTTP_SERVICE, \"http_service\", &http_server::serve);\n\n    tools::register_message_header_parser<http_message_parser>(NET_HDR_HTTP, {\"GET \", \"POST\"});\n\n    // add builtin services\n    register_builtin_http_calls();\n}\n\nvoid http_server::serve(message_ex *msg)\n{\n    error_with<http_request> res = http_request::parse(msg);\n    http_response resp;\n    if (!res.is_ok()) {\n        resp.status_code = http_status_code::bad_request;\n        resp.body = fmt::format(\"failed to parse request: {}\", res.get_error());\n    } else {\n        const http_request &req = res.get_value();\n        std::shared_ptr<http_call> call = http_call_registry::instance().find(req.path);\n        if (call != nullptr) {\n            call->callback(req, resp);\n        } else {\n            resp.status_code = http_status_code::not_found;\n            resp.body = fmt::format(\"service not found for \\\"{}\\\"\", req.path);\n        }\n    }\n\n    http_response_reply(resp, msg);\n}\n\n/*static*/ error_with<http_request> http_request::parse(message_ex *m)\n{\n    if (m->buffers.size() != HTTP_MSG_BUFFERS_NUM) {\n        return error_s::make(ERR_INVALID_DATA,\n                             std::string(\"buffer size is: \") + std::to_string(m->buffers.size()));\n    }\n\n    http_request ret;\n    ret.body = m->buffers[1];\n    ret.full_url = m->buffers[2];\n    ret.method = static_cast<http_method>(m->header->hdr_type);\n\n    http_parser_url u{0};\n    http_parser_parse_url(ret.full_url.data(), ret.full_url.length(), false, &u);\n\n    std::string unresolved_path;\n    if (u.field_set & (1u << UF_PATH)) {\n        uint16_t data_length = u.field_data[UF_PATH].len;\n        unresolved_path.resize(data_length + 1);\n        strncpy(&unresolved_path[0], ret.full_url.data() + u.field_data[UF_PATH].off, data_length);\n        unresolved_path[data_length] = '\\0';\n\n        // decode resolved path\n        auto decoded_unresolved_path = uri::decode(unresolved_path);\n        if (!decoded_unresolved_path.is_ok()) {\n            return decoded_unresolved_path.get_error();\n        }\n        unresolved_path = decoded_unresolved_path.get_value();\n    }\n\n    std::string unresolved_query;\n    if (u.field_set & (1u << UF_QUERY)) {\n        uint16_t data_length = u.field_data[UF_QUERY].len;\n        unresolved_query.resize(data_length);\n        strncpy(\n            &unresolved_query[0], ret.full_url.data() + u.field_data[UF_QUERY].off, data_length);\n\n        // decode resolved query\n        auto decoded_unresolved_query = uri::decode(unresolved_query);\n        if (!decoded_unresolved_query.is_ok()) {\n            return decoded_unresolved_query.get_error();\n        }\n        unresolved_query = decoded_unresolved_query.get_value();\n    }\n\n    // remove tailing '\\0'\n    if (!unresolved_path.empty() && *unresolved_path.crbegin() == '\\0') {\n        unresolved_path.pop_back();\n    }\n\n    // parse path\n    std::vector<std::string> args;\n    boost::split(args, unresolved_path, boost::is_any_of(\"/\"));\n    std::vector<std::string> real_args;\n    for (std::string &arg : args) {\n        if (!arg.empty()) {\n            real_args.emplace_back(std::move(arg));\n        }\n    }\n    if (real_args.size() == 0) {\n        ret.path = \"\";\n    } else {\n        std::string path = real_args[0];\n        for (int i = 1; i < real_args.size(); i++) {\n            path += '/';\n            path += real_args[i];\n        }\n        ret.path = std::move(path);\n    }\n\n    // find if there are method args (<ip>:<port>/<service>/<method>?<arg>=<val>&<arg>=<val>)\n    if (!unresolved_query.empty()) {\n        std::vector<std::string> method_arg_val;\n        boost::split(method_arg_val, unresolved_query, boost::is_any_of(\"&\"));\n        for (const std::string &arg_val : method_arg_val) {\n            size_t sep = arg_val.find_first_of('=');\n            if (sep == std::string::npos) {\n                // assume this as a bool flag\n                ret.query_args.emplace(arg_val, \"\");\n                continue;\n            }\n            std::string name = arg_val.substr(0, sep);\n            std::string value;\n            if (sep + 1 < arg_val.size()) {\n                value = arg_val.substr(sep + 1, arg_val.size() - sep);\n            }\n            auto iter = ret.query_args.find(name);\n            if (iter != ret.query_args.end()) {\n                return FMT_ERR(ERR_INVALID_PARAMETERS, \"duplicate parameter: {}\", name);\n            }\n            ret.query_args.emplace(std::move(name), std::move(value));\n        }\n    }\n\n    return ret;\n}\n\n/*extern*/ void http_response_reply(const http_response &resp, message_ex *req)\n{\n    message_ptr resp_msg = req->create_response();\n\n    std::ostringstream os;\n    os << \"HTTP/1.1 \" << http_status_code_to_string(resp.status_code) << \"\\r\\n\";\n    os << \"Content-Type: \" << resp.content_type << \"\\r\\n\";\n    os << \"Content-Length: \" << resp.body.length() << \"\\r\\n\";\n    if (!resp.location.empty()) {\n        os << \"Location: \" << resp.location << \"\\r\\n\";\n    }\n    os << \"\\r\\n\";\n    os << resp.body;\n\n    rpc_write_stream writer(resp_msg.get());\n    writer.write(os.str().data(), os.str().length());\n    writer.flush();\n\n    dsn_rpc_reply(resp_msg.get());\n}\n\n/*extern*/ void start_http_server()\n{\n    // starts http server as a singleton\n    static http_server server;\n}\n\n/*extern*/ void register_http_service(http_service *svc)\n{\n    // simply hosting the memory of these http services.\n    static std::vector<std::unique_ptr<http_service>> services_holder;\n    static std::mutex mu;\n\n    std::lock_guard<std::mutex> guard(mu);\n    services_holder.push_back(std::unique_ptr<http_service>(svc));\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/http_server_impl.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/cpp/serverlet.h>\n#include <dsn/http/http_server.h>\n\nnamespace dsn {\n\nclass http_server : public serverlet<http_server>\n{\npublic:\n    http_server();\n\n    ~http_server() override = default;\n\n    void serve(message_ex *msg);\n};\n\nextern void http_response_reply(const http_response &resp, message_ex *req);\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/perf_counter_http_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/output_utils.h>\n#include \"builtin_http_calls.h\"\n\nnamespace dsn {\n\nvoid get_perf_counter_handler(const http_request &req, http_response &resp)\n{\n    std::string perf_counter_name;\n    for (const auto &p : req.query_args) {\n        if (\"name\" == p.first) {\n            perf_counter_name = p.second;\n        } else {\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n\n    // get perf counter by perf counter name\n    perf_counter_ptr perf_counter = perf_counters::instance().get_counter(perf_counter_name);\n\n    // insert perf counter info into table printer\n    dsn::utils::table_printer tp;\n    if (perf_counter) {\n        tp.add_row_name_and_data(\"name\", perf_counter_name);\n        if (COUNTER_TYPE_NUMBER_PERCENTILES == perf_counter->type()) {\n            tp.add_row_name_and_data(\"p99\", perf_counter->get_percentile(COUNTER_PERCENTILE_99));\n            tp.add_row_name_and_data(\"p999\", perf_counter->get_percentile(COUNTER_PERCENTILE_999));\n        } else {\n            tp.add_row_name_and_data(\"value\", perf_counter->get_value());\n        }\n        tp.add_row_name_and_data(\"type\", dsn_counter_type_to_string(perf_counter->type()));\n        tp.add_row_name_and_data(\"description\", perf_counter->dsptr());\n    }\n\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/http/pprof_http_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#ifdef DSN_ENABLE_GPERF\n\n#include <fcntl.h>\n\n#include <cstdlib>\n#include <chrono>\n#include <fstream>\n#include <sstream>\n\n#include \"pprof_http_service.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/timer.h>\n#include <dsn/utility/string_splitter.h>\n#include <gperftools/heap-profiler.h>\n#include <gperftools/malloc_extension.h>\n#include <gperftools/profiler.h>\n\nnamespace dsn {\n\n//                            //\n// == ip:port/pprof/symbol == //\n//                            //\n\ntypedef std::map<uintptr_t, std::string> symbol_map_t;\nstatic symbol_map_t symbol_map;\nstatic pthread_once_t s_load_symbolmap_once = PTHREAD_ONCE_INIT;\n\nstruct lib_info\n{\n    uintptr_t start_addr;\n    uintptr_t end_addr;\n    size_t offset;\n    std::string path;\n};\n\nstatic bool has_ext(const std::string &name, const std::string &ext)\n{\n    size_t index = name.find(ext);\n    if (index == std::string::npos) {\n        return false;\n    }\n    return (index + ext.size() == name.size() || name[index + ext.size()] == '.');\n}\n\nstatic int extract_symbols_from_binary(std::map<uintptr_t, std::string> &addr_map,\n                                       const lib_info &lib_info)\n{\n    timer tm;\n    tm.start();\n    std::string cmd = \"nm -C -p \";\n    cmd.append(lib_info.path);\n    std::stringstream ss;\n    ddebug(\"executing `%s`\", cmd.c_str());\n    const int rc = utils::pipe_execute(cmd.c_str(), ss);\n    if (rc < 0) {\n        derror(\"fail to popen `%s`\", cmd.c_str());\n        return -1;\n    }\n    std::string line;\n    while (std::getline(ss, line)) {\n        string_splitter sp(line.c_str(), ' ');\n        if (sp == NULL) {\n            continue;\n        }\n        char *endptr = NULL;\n        uintptr_t addr = strtoull(sp.field(), &endptr, 16);\n        if (*endptr != ' ') {\n            continue;\n        }\n        if (addr < lib_info.start_addr) {\n            addr = addr + lib_info.start_addr - lib_info.offset;\n        }\n        if (addr >= lib_info.end_addr) {\n            continue;\n        }\n        ++sp;\n        if (sp == NULL) {\n            continue;\n        }\n        if (sp.length() != 1UL) {\n            continue;\n        }\n        // const char c = *sp.field();\n\n        ++sp;\n        if (sp == NULL) {\n            continue;\n        }\n        const char *name_begin = sp.field();\n        if (strncmp(name_begin, \"typeinfo \", 9) == 0 || strncmp(name_begin, \"VTT \", 4) == 0 ||\n            strncmp(name_begin, \"vtable \", 7) == 0 || strncmp(name_begin, \"global \", 7) == 0 ||\n            strncmp(name_begin, \"guard \", 6) == 0) {\n            addr_map[addr] = std::string();\n            continue;\n        }\n\n        const char *name_end = sp.field();\n        bool stop = false;\n        char last_char = '\\0';\n        while (1) {\n            switch (*name_end) {\n            case 0:\n            case '\\r':\n            case '\\n':\n                stop = true;\n                break;\n            case '(':\n            case '<':\n                // \\(.*\\w\\)[(<]...    -> \\1\n                // foo(..)            -> foo\n                // foo<...>(...)      -> foo\n                // a::b::foo(...)     -> a::b::foo\n                // a::(b)::foo(...)   -> a::(b)::foo\n                if (isalpha(last_char) || isdigit(last_char) || last_char == '_') {\n                    stop = true;\n                }\n            default:\n                break;\n            }\n            if (stop) {\n                break;\n            }\n            last_char = *name_end++;\n        }\n        // If address conflicts, choose a shorter name (not necessarily to be\n        // T type in nm). This works fine because aliases often have more\n        // prefixes.\n        const size_t name_len = name_end - name_begin;\n        auto it = addr_map.find(addr);\n        if (it != addr_map.end()) {\n            if (name_len < it->second.size()) {\n                it->second.assign(name_begin, name_len);\n            }\n        } else {\n            addr_map[addr] = std::string(name_begin, name_len);\n        }\n    }\n    if (addr_map.find(lib_info.end_addr) == addr_map.end()) {\n        addr_map[lib_info.end_addr] = std::string();\n    }\n    tm.stop();\n    ddebug(\"Loaded %s in %zdms\", lib_info.path.c_str(), tm.m_elapsed());\n    return 0;\n}\n\nstatic void load_symbols()\n{\n    timer tm;\n    tm.start();\n    auto fp = fopen(\"/proc/self/maps\", \"r\");\n    if (fp == nullptr) {\n        return;\n    }\n    auto cleanup = defer([fp]() { fclose(fp); });\n\n    char *line = nullptr;\n    size_t line_len = 0;\n    ssize_t nr = 0;\n    while ((nr = getline(&line, &line_len, fp)) != -1) {\n        string_splitter sp(line, line + nr, ' ');\n        if (sp == NULL) {\n            continue;\n        }\n        char *endptr;\n        uintptr_t start_addr = strtoull(sp.field(), &endptr, 16);\n        if (*endptr != '-') {\n            continue;\n        }\n        ++endptr;\n        uintptr_t end_addr = strtoull(endptr, &endptr, 16);\n        if (*endptr != ' ') {\n            continue;\n        }\n        ++sp;\n        // ..x. must be executable\n        if (sp == NULL || sp.length() != 4 || sp.field()[2] != 'x') {\n            continue;\n        }\n        ++sp;\n        if (sp == NULL) {\n            continue;\n        }\n        size_t offset = strtoull(sp.field(), &endptr, 16);\n        if (*endptr != ' ') {\n            continue;\n        }\n        // skip $4~$5\n        for (int i = 0; i < 3; ++i) {\n            ++sp;\n        }\n        if (sp == NULL) {\n            continue;\n        }\n        size_t n = sp.length();\n        if (sp.field()[n - 1] == '\\n') {\n            --n;\n        }\n        std::string path(sp.field(), n);\n\n        if (!has_ext(path, \".so\") && !has_ext(path, \".dll\") && !has_ext(path, \".dylib\") &&\n            !has_ext(path, \".bundle\")) {\n            continue;\n        }\n        lib_info info;\n        info.start_addr = start_addr;\n        info.end_addr = end_addr;\n        info.offset = offset;\n        info.path = path;\n        extract_symbols_from_binary(symbol_map, info);\n    }\n    free(line);\n\n    lib_info info;\n    info.start_addr = 0;\n    info.end_addr = std::numeric_limits<uintptr_t>::max();\n    info.offset = 0;\n    info.path = program_invocation_name;\n    extract_symbols_from_binary(symbol_map, info);\n\n    timer tm2;\n    tm2.start();\n    size_t num_removed = 0;\n    bool last_is_empty = false;\n    for (auto it = symbol_map.begin(); it != symbol_map.end();) {\n        if (it->second.empty()) {\n            if (last_is_empty) {\n                symbol_map.erase(it++);\n                ++num_removed;\n            } else {\n                ++it;\n            }\n            last_is_empty = true;\n        } else {\n            ++it;\n        }\n    }\n    tm2.stop();\n    if (num_removed) {\n        ddebug(\"Removed %zd entries in %zdms\", num_removed, tm2.m_elapsed());\n    }\n\n    tm.stop();\n    ddebug(\"Loaded all symbols in %zdms\", tm.m_elapsed());\n}\n\nstatic void find_symbols(std::string *out, std::vector<uintptr_t> &addr_list)\n{\n    char buf[32];\n    for (size_t i = 0; i < addr_list.size(); ++i) {\n        int len = snprintf(buf, sizeof(buf), \"0x%08lx\\t\", addr_list[i]);\n        out->append(buf, static_cast<size_t>(len));\n        symbol_map_t::const_iterator it = symbol_map.lower_bound(addr_list[i]);\n        if (it == symbol_map.end() || it->first != addr_list[i]) {\n            if (it != symbol_map.begin()) {\n                --it;\n            } else {\n                len = snprintf(buf, sizeof(buf), \"0x%08lx\\n\", addr_list[i]);\n                out->append(buf, static_cast<size_t>(len));\n                continue;\n            }\n        }\n        if (it->second.empty()) {\n            len = snprintf(buf, sizeof(buf), \"0x%08lx\\n\", addr_list[i]);\n            out->append(buf, static_cast<size_t>(len));\n        } else {\n            out->append(it->second);\n            out->push_back('\\n');\n        }\n    }\n}\n\nvoid pprof_http_service::symbol_handler(const http_request &req, http_response &resp)\n{\n    // Load /proc/self/maps\n    pthread_once(&s_load_symbolmap_once, load_symbols);\n\n    if (req.method != http_method::HTTP_METHOD_POST) {\n        char buf[64];\n        snprintf(buf, sizeof(buf), \"num_symbols: %lu\\n\", symbol_map.size());\n        resp.body = buf;\n        return;\n    }\n\n    // addr_str is addresses separated by +\n    std::string addr_str = req.body.to_string();\n    // May be quoted\n    const char *addr_cstr = addr_str.data();\n    if (*addr_cstr == '\\'' || *addr_cstr == '\"') {\n        ++addr_cstr;\n    }\n    std::vector<uintptr_t> addr_list;\n    addr_list.reserve(32);\n    string_splitter sp(addr_cstr, '+');\n    for (; sp != NULL; ++sp) {\n        char *endptr;\n        uintptr_t addr = strtoull(sp.field(), &endptr, 16);\n        addr_list.push_back(addr);\n    }\n    find_symbols(&resp.body, addr_list);\n}\n\n//                          //\n// == ip:port/pprof/heap == //\n//                          //\nvoid pprof_http_service::heap_handler(const http_request &req, http_response &resp)\n{\n    bool in_pprof = false;\n    if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {\n        dwarn_f(\"node is already exectuting pprof action, please wait and retry\");\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n\n    const std::string SECOND = \"seconds\";\n    const uint32_t kDefaultSecond = 10;\n\n    // get seconds from query params, default value is `kDefaultSecond`\n    uint32_t seconds = kDefaultSecond;\n    const auto iter = req.query_args.find(SECOND);\n    if (iter != req.query_args.end()) {\n        const auto seconds_str = iter->second;\n        dsn::internal::buf2unsigned(seconds_str, seconds);\n    }\n\n    std::stringstream profile_name_prefix;\n    profile_name_prefix << \"heap_profile.\" << getpid() << \".\" << dsn_now_ns();\n\n    HeapProfilerStart(profile_name_prefix.str().c_str());\n    sleep(seconds);\n    const char *profile = GetHeapProfile();\n    HeapProfilerStop();\n\n    resp.status_code = http_status_code::ok;\n    resp.body = profile;\n    delete profile;\n\n    _in_pprof_action.store(false);\n}\n\n//                             //\n// == ip:port/pprof/cmdline == //\n//                             //\n\n// Read command line of this program. If `with_args' is true, args are\n// included and separated with spaces.\n// Returns length of the command line on success, -1 otherwise.\n// NOTE: `buf' does not end with zero.\nssize_t read_command_line(char *buf, size_t len, bool with_args)\n{\n    auto fd = open(\"/proc/self/cmdline\", O_RDONLY);\n    if (fd < 0) {\n        derror(\"Fail to open /proc/self/cmdline\");\n        return -1;\n    }\n    auto cleanup = defer([fd]() { close(fd); });\n    ssize_t nr = read(fd, buf, len);\n    if (nr <= 0) {\n        derror(\"Fail to read /proc/self/cmdline\");\n        return -1;\n    }\n\n    if (with_args) {\n        if ((size_t)nr == len) {\n            derror(\"buf is not big enough\");\n            return -1;\n        }\n        for (ssize_t i = 0; i < nr; ++i) {\n            if (buf[i] == '\\0') {\n                buf[i] = '\\n';\n            }\n        }\n        return nr;\n    } else {\n        for (ssize_t i = 0; i < nr; ++i) {\n            // The command in macos is separated with space and ended with '\\n'\n            if (buf[i] == '\\0' || buf[i] == '\\n' || buf[i] == ' ') {\n                return i;\n            }\n        }\n        if ((size_t)nr == len) {\n            ddebug(\"buf is not big enough\");\n            return -1;\n        }\n        return nr;\n    }\n}\n\nvoid pprof_http_service::cmdline_handler(const http_request &req, http_response &resp)\n{\n    char buf[1024]; // should be enough?\n    const ssize_t nr = read_command_line(buf, sizeof(buf), true);\n    if (nr < 0) {\n        return;\n    }\n    resp.body = buf;\n}\n\n//                             //\n// == ip:port/pprof/growth == //\n//                             //\n\nvoid pprof_http_service::growth_handler(const http_request &req, http_response &resp)\n{\n    bool in_pprof = false;\n    if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {\n        dwarn_f(\"node is already exectuting pprof action, please wait and retry\");\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n\n    MallocExtension *malloc_ext = MallocExtension::instance();\n    ddebug(\"received requests for growth profile\");\n    malloc_ext->GetHeapGrowthStacks(&resp.body);\n\n    _in_pprof_action.store(false);\n}\n\n//                             //\n// == ip:port/pprof/profile == //\n//                             //\nstatic bool get_cpu_profile(std::string &result, useconds_t seconds)\n{\n    const char *file_name = \"cpu.prof\";\n\n    ProfilerStart(file_name);\n    usleep(seconds);\n    ProfilerStop();\n\n    std::ifstream in(file_name);\n    if (!in.is_open()) {\n        result = \"No profile file\";\n        return false;\n    }\n    std::ostringstream content;\n    content << in.rdbuf();\n    result = content.str();\n    in.close();\n    if (remove(file_name) != 0) {\n        result = \"Failed to remove temporary profile file\";\n        return false;\n    }\n    return true;\n}\n\nvoid pprof_http_service::profile_handler(const http_request &req, http_response &resp)\n{\n    bool in_pprof = false;\n    if (!_in_pprof_action.compare_exchange_strong(in_pprof, true)) {\n        dwarn_f(\"node is already exectuting pprof action, please wait and retry\");\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n\n    useconds_t seconds = 60000000;\n\n    std::string req_url = req.full_url.to_string();\n    size_t len = req.full_url.length();\n    string_splitter url_sp(req_url.data(), req_url.data() + len, '?');\n    if (url_sp != NULL && ++url_sp != NULL) {\n        string_splitter param_sp(url_sp.field(), url_sp.field() + url_sp.length(), '&');\n        while (param_sp != NULL) {\n            string_splitter kv_sp(param_sp.field(), param_sp.field() + param_sp.length(), '=');\n            std::string key(kv_sp.field(), kv_sp.length());\n            if (kv_sp != NULL && key == \"seconds\" && ++kv_sp != NULL) {\n                char *end_ptr;\n                seconds = strtoul(kv_sp.field(), &end_ptr, 10) * 1000000;\n                break;\n            }\n            param_sp++;\n        }\n    }\n\n    resp.status_code = http_status_code::ok;\n\n    get_cpu_profile(resp.body, seconds);\n\n    _in_pprof_action.store(false);\n}\n\n} // namespace dsn\n\n#endif // DSN_ENABLE_GPERF\n"
  },
  {
    "path": "src/http/pprof_http_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#ifdef DSN_ENABLE_GPERF\n\n#include <dsn/http/http_server.h>\n\nnamespace dsn {\n\nclass pprof_http_service : public http_service\n{\npublic:\n    pprof_http_service()\n    {\n        register_handler(\"heap\",\n                         std::bind(&pprof_http_service::heap_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/pprof/heap\");\n        register_handler(\"symbol\",\n                         std::bind(&pprof_http_service::symbol_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/pprof/symbol\");\n        register_handler(\"cmdline\",\n                         std::bind(&pprof_http_service::cmdline_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/pprof/cmdline\");\n        register_handler(\"growth\",\n                         std::bind(&pprof_http_service::growth_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/pprof/growth\");\n        register_handler(\"profile\",\n                         std::bind(&pprof_http_service::profile_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/pprof/profile\");\n    }\n\n    std::string path() const override { return \"pprof\"; }\n\n    void heap_handler(const http_request &req, http_response &resp);\n\n    void symbol_handler(const http_request &req, http_response &resp);\n\n    void cmdline_handler(const http_request &req, http_response &resp);\n\n    void growth_handler(const http_request &req, http_response &resp);\n\n    void profile_handler(const http_request &req, http_response &resp);\n\nprivate:\n    std::atomic_bool _in_pprof_action{false};\n};\n\n} // namespace dsn\n\n#endif // DSN_ENABLE_GPERF\n"
  },
  {
    "path": "src/http/service_version.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"service_version.h\"\n\nnamespace dsn {\n\nservice_version app_version;\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/service_version.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n\nnamespace dsn {\n\nstruct service_version\n{\n    std::string version;\n    std::string git_commit;\n};\n\nextern service_version app_version;\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_http_test)\n\nset(MY_PROJ_SRC \"\")\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n    dsn_http\n    dsn_runtime\n    gtest\n    gtest_main\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/http/test/http_server_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/http/http_server.h>\n#include <gtest/gtest.h>\n\n#include \"http/http_message_parser.h\"\n#include \"http/builtin_http_calls.h\"\n#include \"http/http_call_registry.h\"\n\nnamespace dsn {\n\nTEST(http_server, parse_url)\n{\n    struct test_case\n    {\n        std::string url;\n\n        error_code err;\n        std::string path;\n    } tests[] = {\n        {\"http://127.0.0.1:34601\", ERR_OK, \"\"},\n        {\"http://127.0.0.1:34601/\", ERR_OK, \"\"},\n        {\"http://127.0.0.1:34601///\", ERR_OK, \"\"},\n        {\"http://127.0.0.1:34601/threads\", ERR_OK, \"threads\"},\n        {\"http://127.0.0.1:34601/threads/?detail\", ERR_OK, \"threads\"},\n        {\"http://127.0.0.1:34601//pprof/heap/\", ERR_OK, \"pprof/heap\"},\n        {\"http://127.0.0.1:34601//pprof///heap?detailed=true\", ERR_OK, \"pprof/heap\"},\n        {\"http://127.0.0.1:34601/pprof/heap/arg/\", ERR_OK, \"pprof/heap/arg\"},\n        {\"http://127.0.0.1:34601/pprof///heap///arg/\", ERR_OK, \"pprof/heap/arg\"},\n    };\n\n    for (auto tt : tests) {\n        ref_ptr<message_ex> m = message_ex::create_receive_message_with_standalone_header(\n            blob::create_from_bytes(std::string(\"POST\")));\n        m->buffers.emplace_back(blob::create_from_bytes(std::string(tt.url)));\n        m->buffers.resize(HTTP_MSG_BUFFERS_NUM);\n\n        auto res = http_request::parse(m.get());\n        if (res.is_ok()) {\n            ASSERT_EQ(res.get_value().path, tt.path) << tt.url;\n        } else {\n            ASSERT_EQ(res.get_error().code(), tt.err);\n        }\n    }\n}\n\nTEST(bultin_http_calls_test, meta_query)\n{\n    http_request req;\n    http_response resp;\n    get_recent_start_time_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::ok);\n\n    get_version_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::ok);\n}\n\nTEST(bultin_http_calls_test, get_help)\n{\n    for (const auto &call : http_call_registry::instance().list_all_calls()) {\n        http_call_registry::instance().remove(call->path);\n    }\n\n    register_http_call(\"\")\n        .with_callback(\n            [](const http_request &req, http_response &resp) { get_help_handler(req, resp); })\n        .with_help(\"ip:port/\");\n\n    http_request req;\n    http_response resp;\n    get_help_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::ok);\n    ASSERT_EQ(resp.body, \"{\\\"/\\\":\\\"ip:port/\\\"}\\n\");\n\n    register_http_call(\"recentStartTime\")\n        .with_callback([](const http_request &req, http_response &resp) {\n            get_recent_start_time_handler(req, resp);\n        })\n        .with_help(\"ip:port/recentStartTime\");\n\n    get_help_handler(req, resp);\n    ASSERT_EQ(resp.body, \"{\\\"/\\\":\\\"ip:port/\\\",\\\"/recentStartTime\\\":\\\"ip:port/recentStartTime\\\"}\\n\");\n\n    for (const auto &call : http_call_registry::instance().list_all_calls()) {\n        http_call_registry::instance().remove(call->path);\n    }\n}\n\nclass http_message_parser_test : public testing::Test\n{\npublic:\n    void parse_bad_request()\n    {\n        // not complete in normal way\n        std::string http_request = \"GET \"\n                                   \"/ HTTP/1.1\\r\\n\";\n\n        message_reader reader(64);\n        char *buf = reader.read_buffer_ptr(http_request.size());\n        memcpy(buf, http_request.data(), http_request.size());\n        reader.mark_read(http_request.size());\n\n        http_message_parser parser;\n        int read_next = 0;\n        message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(parser._stage, HTTP_ON_URL); // url parsed\n        ASSERT_EQ(parser._parsed_length, http_request.size());\n        ASSERT_EQ(parser._url, \"/\");\n        ASSERT_NE(parser._current_message, nullptr);\n        ASSERT_NE(read_next, -1);\n\n        // normal request\n        const char http_request2[] = \"GET / HTTP/1.1\\r\\n\"\n                                     \"Host: baidu.com\\r\\n\"\n                                     \"Accept: */*\\r\\n\"\n                                     \"\\r\\n\";\n        buf = reader.read_buffer_ptr(sizeof(http_request2));\n        memcpy(buf, http_request2, sizeof(http_request2));\n        reader.mark_read(sizeof(http_request2));\n\n        msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(read_next, -1);\n    }\n\n    void parse_multiple_requests()\n    {\n        std::string http_request = std::string(\"GET /\") + \" HTTP/1.1\\r\\n\\r\\n\";\n        std::string requests;\n        for (int i = 0; i < 100; i++) {\n            requests += http_request;\n        }\n\n        message_reader reader(64);\n        char *buf = reader.read_buffer_ptr(requests.size());\n        memcpy(buf, requests.data(), requests.size());\n        reader.mark_read(requests.size());\n\n        http_message_parser parser;\n        int read_next = 0;\n\n        for (int i = 0; i < 100; i++) {\n            message_ptr msg = parser.get_message_on_receive(&reader, read_next);\n            ASSERT_NE(msg, nullptr);\n            ASSERT_EQ(msg->hdr_format, NET_HDR_HTTP);\n            ASSERT_EQ(msg->header->hdr_type, http_method::HTTP_METHOD_GET);\n            ASSERT_EQ(msg->header->context.u.is_request, 1);\n            ASSERT_EQ(msg->buffers.size(), HTTP_MSG_BUFFERS_NUM);\n            ASSERT_EQ(msg->buffers[2].size(), 1); // url\n\n            // ensure states are reset\n            ASSERT_EQ(parser._current_message, nullptr);\n            ASSERT_EQ(parser._stage, HTTP_INVALID);\n            ASSERT_EQ(parser._parsed_length, 0);\n            ASSERT_EQ(parser._received_messages.size(), 100 - i - 1);\n        }\n\n        ASSERT_EQ(parser._received_messages.size(), 0);\n    }\n};\n\nTEST_F(http_message_parser_test, parse_request)\n{\n    std::string http_request = \"POST /path/file.html?sdfsdf=sdfs&sldf1=sdf HTTP/1.1\\r\\n\"\n                               \"User-Agent: HTTPTool/1.0  \\r\\n\" // intended ending spaces\n                               \"Content-Type: json\\r\\n\"\n                               \"Content-Length: 19\\r\\n\"\n                               \"Log-ID: 456\\r\\n\"\n                               \"Host: myhost\\r\\n\"\n                               \"Correlation-ID: 123\\r\\n\"\n                               \"Authorization: test\\r\\n\"\n                               \"Accept: */*\\r\\n\"\n                               \"\\r\\n\"\n                               \"Message Body sdfsdf\\r\\n\";\n\n    message_reader reader(64);\n    char *buf = reader.read_buffer_ptr(http_request.size());\n    memcpy(buf, http_request.data(), http_request.size());\n    reader.mark_read(http_request.size());\n\n    http_message_parser parser;\n    int read_next = 0;\n    message_ptr msg = parser.get_message_on_receive(&reader, read_next);\n    ASSERT_NE(msg, nullptr);\n\n    ASSERT_EQ(msg->hdr_format, NET_HDR_HTTP);\n    ASSERT_EQ(msg->header->hdr_type, http_method::HTTP_METHOD_POST);\n    ASSERT_EQ(msg->header->context.u.is_request, 1);\n    ASSERT_EQ(msg->buffers.size(), HTTP_MSG_BUFFERS_NUM);\n    ASSERT_EQ(msg->buffers[1].to_string(), \"Message Body sdfsdf\"); // body\n    ASSERT_EQ(                                                     // url\n        msg->buffers[2].to_string(),\n        std::string(\"/path/file.html?sdfsdf=sdfs&sldf1=sdf\"));\n    ASSERT_EQ(msg->buffers[3].to_string(), std::string(\"json\"));\n}\n\nTEST_F(http_message_parser_test, eof)\n{\n    std::string http_request =\n        \"GET \"\n        \"/CloudApiControl/HttpServer/telematics/v3/\"\n        \"weather?location=%E6%B5%B7%E5%8D%97%E7%9C%81%E7%9B%B4%E8%BE%96%E5%8E%BF%E7%BA%A7%E8%A1%8C%\"\n        \"E6%94%BF%E5%8D%95%E4%BD%8D&output=json&ak=0l3FSP6qA0WbOzGRaafbmczS HTTP/1.1\\r\\n\"\n        \"X-Host: api.map.baidu.com\\r\\n\"\n        \"X-Forwarded-Proto: http\\r\\n\"\n        \"Host: api.map.baidu.com\\r\\n\"\n        \"User-Agent: IME/Android/4.4.2/N80.QHD.LT.X10.V3/N80.QHD.LT.X10.V3.20150812.031915\\r\\n\"\n        \"Accept: application/json\\r\\n\"\n        \"Accept-Charset: UTF-8,*;q=0.5\\r\\n\"\n        \"Accept-Encoding: deflate,sdch\\r\\n\"\n        \"Accept-Language: zh-CN,en-US;q=0.8,zh;q=0.6\\r\\n\"\n        \"Bfe-Atk: NORMAL_BROWSER\\r\\n\"\n        \"Bfe_logid: 8767802212038413243\\r\\n\"\n        \"Bfeip: 10.26.124.40\\r\\n\"\n        \"CLIENTIP: 119.29.102.26\\r\\n\"\n        \"CLIENTPORT: 59863\\r\\n\"\n        \"Cache-Control: max-age=0\\r\\n\"\n        \"Content-Type: application/json;charset=utf8\\r\\n\"\n        \"X-Forwarded-For: 119.29.102.26\\r\\n\"\n        \"X-Forwarded-Port: 59863\\r\\n\"\n        \"X-Ime-Imei: 35629601890905\\r\\n\"\n        \"X_BD_LOGID: 3959476981\\r\\n\"\n        \"X_BD_LOGID64: 16815814797661447369\\r\\n\"\n        \"X_BD_PRODUCT: map\\r\\n\"\n        \"X_BD_SUBSYS: apimap\\r\\n\\r\\n\";\n\n    message_reader reader(64);\n    char *buf = reader.read_buffer_ptr(http_request.size());\n    memcpy(buf, http_request.data(), http_request.size());\n    reader.mark_read(http_request.size());\n\n    http_message_parser parser;\n    int read_next = 0;\n    message_ptr msg = parser.get_message_on_receive(&reader, read_next);\n    ASSERT_NE(msg, nullptr);\n\n    ASSERT_EQ(msg->hdr_format, NET_HDR_HTTP);\n    ASSERT_EQ(msg->header->hdr_type, http_method::HTTP_METHOD_GET);\n    ASSERT_EQ(msg->header->context.u.is_request, 1);\n    ASSERT_EQ(msg->buffers.size(), HTTP_MSG_BUFFERS_NUM);\n    ASSERT_EQ(msg->buffers[1].to_string(), \"\"); // body\n    ASSERT_EQ(                                  // url\n        msg->buffers[2].to_string(),\n        std::string(\"/CloudApiControl/HttpServer/telematics/v3/\"\n                    \"weather?location=%E6%B5%B7%E5%8D%97%E7%9C%81%E7%9B%B4%E8%BE%96%E5%8E%BF%E7%BA%\"\n                    \"A7%E8%A1%8C%\"\n                    \"E6%94%BF%E5%8D%95%E4%BD%8D&output=json&ak=0l3FSP6qA0WbOzGRaafbmczS\"));\n    ASSERT_EQ(msg->buffers[3].to_string(), std::string(\"application/json;charset=utf8\"));\n}\n\nTEST_F(http_message_parser_test, parse_bad_request) { parse_bad_request(); }\n\nTEST_F(http_message_parser_test, parse_multiple_requests) { parse_multiple_requests(); }\n\nTEST_F(http_message_parser_test, parse_long_url)\n{\n    std::string http_request = \"GET /\" + std::string(4096, 'a') + \" HTTP/1.1\\r\\n\\r\\n\";\n\n    message_reader reader(64);\n    char *buf = reader.read_buffer_ptr(http_request.size());\n    memcpy(buf, http_request.data(), http_request.size());\n    reader.mark_read(http_request.size());\n\n    http_message_parser parser;\n    int read_next = 0;\n    message_ptr msg = parser.get_message_on_receive(&reader, read_next);\n    ASSERT_NE(msg, nullptr);\n    ASSERT_EQ(msg->hdr_format, NET_HDR_HTTP);\n    ASSERT_EQ(msg->header->hdr_type, http_method::HTTP_METHOD_GET);\n    ASSERT_EQ(msg->header->context.u.is_request, 1);\n    ASSERT_EQ(msg->buffers.size(), HTTP_MSG_BUFFERS_NUM);\n    ASSERT_EQ(msg->buffers[2].size(), 4097); // url\n}\n\nTEST_F(http_message_parser_test, parse_query_params)\n{\n    struct test_case\n    {\n        std::string url;\n\n        error_code err;\n        std::unordered_map<std::string, std::string> result;\n    } tests[] = {\n        {\"http://127.0.0.1:34601?query1\", ERR_OK, {{\"query1\", \"\"}}},\n        {\"http://127.0.0.1:34601?query1=\", ERR_OK, {{\"query1\", \"\"}}},\n        {\"http://127.0.0.1:34601?query1=value1\", ERR_OK, {{\"query1\", \"value1\"}}},\n        {\"http://127.0.0.1:34601?=\", ERR_OK, {{\"\", \"\"}}},\n        {\"http://127.0.0.1:34601?\", ERR_OK, {}},\n\n        {\"http://127.0.0.1:34601?query1=value1&query2=value2\",\n         ERR_OK,\n         {{\"query1\", \"value1\"}, {\"query2\", \"value2\"}}},\n\n        {\"http://127.0.0.1:34601?query1=value1&query2\",\n         ERR_OK,\n         {{\"query1\", \"value1\"}, {\"query2\", \"\"}}},\n    };\n\n    for (auto tt : tests) {\n        ref_ptr<message_ex> m = message_ex::create_receive_message_with_standalone_header(\n            blob::create_from_bytes(std::string(\"POST\")));\n        m->buffers.emplace_back(blob::create_from_bytes(std::string(tt.url)));\n        m->buffers.resize(HTTP_MSG_BUFFERS_NUM);\n\n        auto res = http_request::parse(m.get());\n        if (res.is_ok()) {\n            ASSERT_EQ(res.get_value().query_args, tt.result) << tt.url;\n        } else {\n            ASSERT_EQ(res.get_error().code(), tt.err);\n        }\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/http/test/perf_counter_http_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/perf_counter/perf_counters.h>\n#include <dsn/http/http_server.h>\n\n#include \"http/builtin_http_calls.h\"\n\nnamespace dsn {\n\nTEST(perf_counter_http_service_test, get_perf_counter)\n{\n    struct test_case\n    {\n        const char *app;\n        const char *section;\n        const char *name;\n        dsn_perf_counter_type_t type;\n        const char *description;\n    } tests[] = {\n        {\"replica\", \"http\", \"number\", COUNTER_TYPE_NUMBER, \"number type\"},\n        {\"replica\", \"http\", \"volatile\", COUNTER_TYPE_VOLATILE_NUMBER, \"volatile type\"},\n        {\"replica\", \"http\", \"rate\", COUNTER_TYPE_RATE, \"rate type\"},\n        {\"replica\", \"http\", \"percentline\", COUNTER_TYPE_NUMBER_PERCENTILES, \"percentline type\"}};\n\n    for (auto test : tests) {\n        // create perf counter\n        perf_counter_wrapper counter;\n        counter.init_global_counter(test.app, test.section, test.name, test.type, test.description);\n\n        std::string perf_counter_name;\n        perf_counter::build_full_name(test.app, test.section, test.name, perf_counter_name);\n\n        // get perf counter info through the http interface\n        http_request fake_req;\n        http_response fake_resp;\n        fake_req.query_args.emplace(\"name\", perf_counter_name);\n        get_perf_counter_handler(fake_req, fake_resp);\n\n        // get fake json based on the perf counter info which is getting above\n        std::string fake_json;\n        if (COUNTER_TYPE_NUMBER_PERCENTILES == test.type) {\n            fake_json = R\"({\"name\":\")\" + perf_counter_name + R\"(\",)\" +\n                        R\"(\"p99\":\"0.00\",\"p999\":\"0.00\",)\" +\n                        R\"(\"type\":\")\" + dsn_counter_type_to_string(test.type) + R\"(\",)\" +\n                        R\"(\"description\":\")\" + test.description + R\"(\"})\" + \"\\n\";\n        } else {\n            fake_json = R\"({\"name\":\")\" + perf_counter_name + R\"(\",)\" +\n                        R\"(\"value\":\"0.00\",)\" +\n                        R\"(\"type\":\")\" + dsn_counter_type_to_string(test.type) + R\"(\",)\" +\n                        R\"(\"description\":\")\" + test.description + R\"(\"})\" + \"\\n\";\n        }\n\n        ASSERT_EQ(fake_resp.status_code, http_status_code::ok);\n        ASSERT_EQ(fake_resp.body, fake_json);\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/http/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\noutput_xml=\"${REPORT_DIR}/dsn_http_test.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_http_test\n"
  },
  {
    "path": "src/http/test/uri_decoder_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/error_code.h>\n#include <http/uri_decoder.h>\n\nnamespace dsn {\nnamespace uri {\n\nclass uri_decoder_test : public testing::Test\n{\n};\n\nTEST_F(uri_decoder_test, decode)\n{\n    /// Extract from https://github.com/cpp-netlib/uri/blob/master/test/uri_encoding_test.cpp\n    struct test_case\n    {\n        std::string to_decode_uri;\n        error_code err;\n        std::string decoded_uri;\n        std::string description;\n    } tests[] = {\n        {\"http%3A%2F%2F127.0.0.1%3A34101%2FperfCounter%3Fname%3Dcollector*app%23_all_\",\n         ERR_OK,\n         \"http://127.0.0.1:34101/perfCounter?name=collector*app#_all_\",\n         \"ERR_OK\"},\n        {\"%EB%B2%95%EC%A0%95%EB%8F%99\", ERR_OK, \"\\xEB\\xB2\\x95\\xEC\\xA0\\x95\\xEB\\x8F\\x99\", \"ERR_OK\"},\n        {\"%21%23%24%26%27%28%29%2A%2B%2C%2F%3A%3B%3D%3F%40%5B%5D\",\n         ERR_OK,\n         \"!#$&'()*+,/:;=?@[]\",\n         \"ERR_OK\"},\n        {\"%\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: Encountered partial escape sequence at end of string\"},\n        {\"%2\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: Encountered partial escape sequence at end of string\"},\n        {\"%%%\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: The characters %% do not form a hex value. \"\n         \"Please escape it or pass a valid hex value\"},\n        {\"%2%\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: The characters 2% do not form a hex value. \"\n         \"Please escape it or pass a valid hex value\"},\n        {\"%G0\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: The characters G0 do not form a hex value. \"\n         \"Please escape it or pass a valid hex value\"},\n        {\"%0G\",\n         ERR_INVALID_PARAMETERS,\n         \"\",\n         \"ERR_INVALID_PARAMETERS: The characters 0G do not form a hex value. \"\n         \"Please escape it or pass a valid hex value\"},\n        {\"%20\", ERR_OK, \"\\x20\", \"ERR_OK\"},\n        {\"%80\", ERR_OK, \"\\x80\", \"ERR_OK\"}};\n\n    for (auto test : tests) {\n        auto decode_res = decode(test.to_decode_uri);\n\n        ASSERT_EQ(decode_res.get_error().code(), test.err);\n        if (ERR_OK == test.err) {\n            ASSERT_EQ(decode_res.get_value(), test.decoded_uri);\n        }\n        ASSERT_EQ(decode_res.get_error().description(), test.description);\n    }\n}\n\n} // namespace dsn\n} // namespace uri\n"
  },
  {
    "path": "src/http/uri_decoder.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <fmt/format.h>\n#include \"uri_decoder.h\"\n\nnamespace dsn {\nnamespace uri {\n\nerror_with<char> from_hex(const char c)\n{\n    switch (c) {\n    case '0' ... '9':\n        return c - '0';\n    case 'a' ... 'f':\n        return c - 'a' + 10;\n    case 'A' ... 'F':\n        return c - 'A' + 10;\n    default:\n        return error_s::make(ERR_INVALID_PARAMETERS);\n    }\n}\n\nerror_with<char> decode_char(const string_view &hex)\n{\n    assert(2 == hex.size());\n\n    auto high = from_hex(hex[0]);\n    auto low = from_hex(hex[1]);\n    if (high.is_ok() && low.is_ok()) {\n        return (high.get_value() << 4) | low.get_value();\n    }\n\n    return error_s::make(ERR_INVALID_PARAMETERS);\n}\n\nerror_with<std::string> decode(const string_view &encoded_uri)\n{\n    std::string decoded_uri;\n    for (size_t i = 0; i < encoded_uri.size(); ++i) {\n        // '%' is followed by 2 hex chars\n        if ('%' == encoded_uri[i]) {\n            if (i + 2 >= encoded_uri.size()) {\n                return error_s::make(ERR_INVALID_PARAMETERS,\n                                     \"Encountered partial escape sequence at end of string\");\n            }\n\n            const string_view encoded_char(encoded_uri.data() + i + 1, 2);\n            auto decoded_char = decode_char(encoded_char);\n            if (!decoded_char.is_ok()) {\n                return error_s::make(\n                    ERR_INVALID_PARAMETERS,\n                    fmt::format(\"The characters {} do not \"\n                                \"form a hex value. Please escape it or pass a valid hex value\",\n                                encoded_char.data()));\n            }\n            decoded_uri += decoded_char.get_value();\n            i += 2;\n        } else {\n            decoded_uri += encoded_uri[i];\n        }\n    }\n\n    return decoded_uri;\n}\n\n} // namespace uri\n} // namespace dsn\n"
  },
  {
    "path": "src/http/uri_decoder.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <string>\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace uri {\n\n/// \\brief Decodes a sequence according to the percent decoding rules.\n/// \\returns the decoded uri path\nerror_with<std::string> decode(const string_view &encoded_uri);\n\n} // namespace uri\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_meta_server)\n\nset(DUPLICATION_SRC\n        duplication/duplication_info.cpp\n        duplication/meta_duplication_service.cpp\n        )\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"${DUPLICATION_SRC}\")\n\nset(MY_PROJ_LIBS\n    dsn_replication_common\n    dsn.block_service\n    dsn.block_service.local\n    dsn.block_service.fds\n    dsn.block_service.hdfs\n    dsn.failure_detector\n    dsn.replication.zookeeper_provider\n    dsn_dist_cmd\n    dsn_http\n    dsn_runtime\n    dsn_aio\n    zookeeper\n    galaxy-fds-sdk-cpp\n    PocoNet\n    PocoFoundation\n    PocoNetSSL\n    PocoJSON\n    crypto\n    hdfs\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_shared_library()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/meta/app_balance_policy.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/dist/fmt_logging.h>\n#include \"app_balance_policy.h\"\n#include \"meta_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\napp_balance_policy::app_balance_policy(meta_service *svc)\n    : load_balance_policy(svc),\n      _ctrl_balancer_in_turn(nullptr),\n      _ctrl_only_primary_balancer(nullptr),\n      _ctrl_only_move_primary(nullptr)\n{\n    if (_svc != nullptr) {\n        _balancer_in_turn = _svc->get_meta_options()._lb_opts.balancer_in_turn;\n        _only_primary_balancer = _svc->get_meta_options()._lb_opts.only_primary_balancer;\n        _only_move_primary = _svc->get_meta_options()._lb_opts.only_move_primary;\n    } else {\n        _balancer_in_turn = false;\n        _only_primary_balancer = false;\n        _only_move_primary = false;\n    }\n    register_ctrl_commands();\n}\n\napp_balance_policy::~app_balance_policy() { unregister_ctrl_commands(); }\n\nvoid app_balance_policy::balance(bool checker, const meta_view *global_view, migration_list *list)\n{\n    init(global_view, list);\n    const app_mapper &apps = *_global_view->apps;\n    if (!execute_balance(apps,\n                         checker,\n                         _balancer_in_turn,\n                         _only_move_primary,\n                         std::bind(&app_balance_policy::primary_balance,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2))) {\n        return;\n    }\n\n    if (!need_balance_secondaries(checker)) {\n        return;\n    }\n\n    // we seperate the primary/secondary balancer for 2 reasons:\n    // 1. globally primary balancer may make secondary unbalanced\n    // 2. in one-by-one mode, a secondary balance decision for an app may be prior than\n    // another app's primary balancer if not seperated.\n    execute_balance(apps,\n                    checker,\n                    _balancer_in_turn,\n                    _only_move_primary,\n                    std::bind(&app_balance_policy::copy_secondary,\n                              this,\n                              std::placeholders::_1,\n                              std::placeholders::_2));\n}\n\nvoid app_balance_policy::register_ctrl_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _ctrl_balancer_in_turn = dsn::command_manager::instance().register_command(\n            {\"meta.lb.balancer_in_turn\"},\n            \"meta.lb.balancer_in_turn <true|false>\",\n            \"control whether do app balancer in turn\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(_balancer_in_turn, \"lb.balancer_in_turn\", args);\n            });\n\n        _ctrl_only_primary_balancer = dsn::command_manager::instance().register_command(\n            {\"meta.lb.only_primary_balancer\"},\n            \"meta.lb.only_primary_balancer <true|false>\",\n            \"control whether do only primary balancer\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(\n                    _only_primary_balancer, \"lb.only_primary_balancer\", args);\n            });\n\n        _ctrl_only_move_primary = dsn::command_manager::instance().register_command(\n            {\"meta.lb.only_move_primary\"},\n            \"meta.lb.only_move_primary <true|false>\",\n            \"control whether only move primary in balancer\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(\n                    _only_move_primary, \"lb.only_move_primary\", args);\n            });\n    });\n}\n\nvoid app_balance_policy::unregister_ctrl_commands()\n{\n    UNREGISTER_VALID_HANDLER(_ctrl_balancer_in_turn);\n    UNREGISTER_VALID_HANDLER(_ctrl_only_primary_balancer);\n    UNREGISTER_VALID_HANDLER(_ctrl_only_move_primary);\n}\n\nbool app_balance_policy::need_balance_secondaries(bool balance_checker)\n{\n    if (!balance_checker && !_migration_result->empty()) {\n        ddebug(\"stop to do secondary balance coz we already have actions to do\");\n        return false;\n    }\n    if (_only_primary_balancer) {\n        ddebug(\"stop to do secondary balancer coz it is not allowed\");\n        return false;\n    }\n    return true;\n}\n\nbool app_balance_policy::copy_secondary(const std::shared_ptr<app_state> &app, bool place_holder)\n{\n    node_mapper &nodes = *(_global_view->nodes);\n    const app_mapper &apps = *_global_view->apps;\n    int replicas_low = app->partition_count / _alive_nodes;\n\n    std::unique_ptr<copy_replica_operation> operation = dsn::make_unique<copy_secondary_operation>(\n        app, apps, nodes, address_vec, address_id, replicas_low);\n    return operation->start(_migration_result);\n}\n\ncopy_secondary_operation::copy_secondary_operation(\n    const std::shared_ptr<app_state> app,\n    const app_mapper &apps,\n    node_mapper &nodes,\n    const std::vector<dsn::rpc_address> &address_vec,\n    const std::unordered_map<dsn::rpc_address, int> &address_id,\n    int replicas_low)\n    : copy_replica_operation(app, apps, nodes, address_vec, address_id), _replicas_low(replicas_low)\n{\n}\n\nbool copy_secondary_operation::can_continue()\n{\n    int id_min = *_ordered_address_ids.begin();\n    int id_max = *_ordered_address_ids.rbegin();\n    if (_partition_counts[id_max] <= _replicas_low ||\n        _partition_counts[id_max] - _partition_counts[id_min] <= 1) {\n        ddebug_f(\"{}: stop copy secondary coz it will be balanced later\", _app->get_logname());\n        return false;\n    }\n    return true;\n}\n\nint copy_secondary_operation::get_partition_count(const node_state &ns) const\n{\n    return ns.partition_count(_app->app_id);\n}\n\nbool copy_secondary_operation::can_select(gpid pid, migration_list *result)\n{\n    int id_max = *_ordered_address_ids.rbegin();\n    const node_state &max_ns = _nodes.at(_address_vec[id_max]);\n    if (max_ns.served_as(pid) == partition_status::PS_PRIMARY) {\n        dinfo_f(\"{}: skip gpid({}.{}) coz it is primary\",\n                _app->get_logname(),\n                pid.get_app_id(),\n                pid.get_partition_index());\n        return false;\n    }\n\n    // if the pid have been used\n    if (result->find(pid) != result->end()) {\n        dinfo_f(\"{}: skip gpid({}.{}) coz it is already copyed\",\n                _app->get_logname(),\n                pid.get_app_id(),\n                pid.get_partition_index());\n        return false;\n    }\n\n    int id_min = *_ordered_address_ids.begin();\n    const node_state &min_ns = _nodes.at(_address_vec[id_min]);\n    if (min_ns.served_as(pid) != partition_status::PS_INACTIVE) {\n        dinfo_f(\"{}: skip gpid({}.{}) coz it is already a member on the target node\",\n                _app->get_logname(),\n                pid.get_app_id(),\n                pid.get_partition_index());\n        return false;\n    }\n    return true;\n}\n\nbalance_type copy_secondary_operation::get_balance_type() { return balance_type::COPY_SECONDARY; }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/app_balance_policy.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"load_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\nclass app_balance_policy : public load_balance_policy\n{\npublic:\n    app_balance_policy(meta_service *svc);\n    ~app_balance_policy();\n\n    void balance(bool checker, const meta_view *global_view, migration_list *list);\n\nprivate:\n    bool need_balance_secondaries(bool balance_checker);\n    bool copy_secondary(const std::shared_ptr<app_state> &app, bool place_holder);\n\n    void register_ctrl_commands();\n    void unregister_ctrl_commands();\n\n    dsn_handle_t _ctrl_balancer_in_turn;\n    dsn_handle_t _ctrl_only_primary_balancer;\n    dsn_handle_t _ctrl_only_move_primary;\n\n    // options\n    bool _balancer_in_turn;\n    bool _only_primary_balancer;\n    bool _only_move_primary;\n};\n\nclass copy_secondary_operation : public copy_replica_operation\n{\npublic:\n    copy_secondary_operation(const std::shared_ptr<app_state> app,\n                             const app_mapper &apps,\n                             node_mapper &nodes,\n                             const std::vector<dsn::rpc_address> &address_vec,\n                             const std::unordered_map<dsn::rpc_address, int> &address_id,\n                             int replicas_low);\n    ~copy_secondary_operation() = default;\n\nprivate:\n    bool can_continue();\n    int get_partition_count(const node_state &ns) const;\n    bool can_select(gpid pid, migration_list *result);\n    balance_type get_balance_type();\n    bool only_copy_primary() { return false; }\n\n    int _replicas_low;\n\n    FRIEND_TEST(copy_secondary_operation, misc);\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/app_env_validator.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"app_env_validator.h\"\n\n#include \"common/replication_common.h\"\n#include <fmt/format.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utils/token_bucket_throttling_controller.h>\n\nnamespace dsn {\nnamespace replication {\n\nbool validate_app_env(const std::string &env_name,\n                      const std::string &env_value,\n                      std::string &hint_message)\n{\n    return app_env_validator::instance().validate_app_env(env_name, env_value, hint_message);\n}\n\nbool check_slow_query(const std::string &env_value, std::string &hint_message)\n{\n    uint64_t threshold = 0;\n    if (!dsn::buf2uint64(env_value, threshold) ||\n        threshold < replica_envs::MIN_SLOW_QUERY_THRESHOLD_MS) {\n        hint_message = fmt::format(\"Slow query threshold must be >= {}ms\",\n                                   replica_envs::MIN_SLOW_QUERY_THRESHOLD_MS);\n        return false;\n    }\n    return true;\n}\n\nbool check_deny_client(const std::string &env_value, std::string &hint_message)\n{\n    std::vector<std::string> sub_sargs;\n    utils::split_args(env_value.c_str(), sub_sargs, '*', true);\n\n    std::string invalid_hint_message = \"Invalid deny client args, valid include: timeout*all, \"\n                                       \"timeout*write, timeout*read; reconfig*all, reconfig*write, \"\n                                       \"reconfig*read\";\n    if (sub_sargs.size() != 2) {\n        hint_message = invalid_hint_message;\n        return false;\n    }\n    if ((sub_sargs[0] != \"timeout\" && sub_sargs[0] != \"reconfig\") ||\n        (sub_sargs[1] != \"all\" && sub_sargs[1] != \"write\" && sub_sargs[1] != \"read\")) {\n        hint_message = invalid_hint_message;\n        return false;\n    }\n    return true;\n}\n\nbool check_rocksdb_iteration(const std::string &env_value, std::string &hint_message)\n{\n    uint64_t threshold = 0;\n    if (!dsn::buf2uint64(env_value, threshold) || threshold < 0) {\n        hint_message = \"Rocksdb iteration threshold must be greater than zero\";\n        return false;\n    }\n    return true;\n}\n\nbool check_throttling(const std::string &env_value, std::string &hint_message)\n{\n    std::vector<std::string> sargs;\n    utils::split_args(env_value.c_str(), sargs, ',');\n    if (sargs.empty()) {\n        hint_message = \"The value shouldn't be empty\";\n        return false;\n    }\n\n    // example for sarg: 100K*delay*100 / 100M*reject*100\n    bool reject_parsed = false;\n    bool delay_parsed = false;\n    for (std::string &sarg : sargs) {\n        std::vector<std::string> sub_sargs;\n        utils::split_args(sarg.c_str(), sub_sargs, '*', true);\n        if (sub_sargs.size() != 3) {\n            hint_message = fmt::format(\"The field count of {} should be 3\", sarg);\n            return false;\n        }\n\n        // check the first part, which is must be a positive number followed with 'K' or 'M'\n        int64_t units = 0;\n        if (!sub_sargs[0].empty() &&\n            ('M' == *sub_sargs[0].rbegin() || 'K' == *sub_sargs[0].rbegin())) {\n            sub_sargs[0].pop_back();\n        }\n        if (!buf2int64(sub_sargs[0], units) || units < 0) {\n            hint_message = fmt::format(\"{} should be non-negative int\", sub_sargs[0]);\n            return false;\n        }\n\n        // check the second part, which is must be \"delay\" or \"reject\"\n        if (sub_sargs[1] == \"delay\") {\n            if (delay_parsed) {\n                hint_message = \"duplicate delay config\";\n                return false;\n            }\n            delay_parsed = true;\n        } else if (sub_sargs[1] == \"reject\") {\n            if (reject_parsed) {\n                hint_message = \"duplicate reject config\";\n                return false;\n            }\n            reject_parsed = true;\n        } else {\n            hint_message = fmt::format(\"{} should be \\\"delay\\\" or \\\"reject\\\"\", sub_sargs[1]);\n            return false;\n        }\n\n        // check the third part, which is must be a positive number or 0\n        int64_t delay_ms = 0;\n        if (!buf2int64(sub_sargs[2], delay_ms) || delay_ms < 0) {\n            hint_message = fmt::format(\"{} should be non-negative int\", sub_sargs[2]);\n            return false;\n        }\n    }\n\n    return true;\n}\n\nbool check_bool_value(const std::string &env_value, std::string &hint_message)\n{\n    bool result = false;\n    if (!dsn::buf2bool(env_value, result)) {\n        hint_message = fmt::format(\"invalid string {}, should be \\\"true\\\" or \\\"false\\\"\", env_value);\n        return false;\n    }\n    return true;\n}\n\nbool app_env_validator::validate_app_env(const std::string &env_name,\n                                         const std::string &env_value,\n                                         std::string &hint_message)\n{\n    auto func_iter = _validator_funcs.find(env_name);\n    if (func_iter != _validator_funcs.end()) {\n        // check function == nullptr means no check\n        if (nullptr != func_iter->second && !func_iter->second(env_value, hint_message)) {\n            dwarn_f(\"{}={} is invalid.\", env_name, env_value);\n            return false;\n        }\n\n        return true;\n    }\n\n    hint_message = fmt::format(\"app_env \\\"{}\\\" is not supported\", env_name);\n    return false;\n}\n\nvoid app_env_validator::register_all_validators()\n{\n    _validator_funcs = {\n        {replica_envs::SLOW_QUERY_THRESHOLD,\n         std::bind(&check_slow_query, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         std::bind(&check_throttling, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::WRITE_SIZE_THROTTLING,\n         std::bind(&check_throttling, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::ROCKSDB_ITERATION_THRESHOLD_TIME_MS,\n         std::bind(&check_rocksdb_iteration, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::ROCKSDB_BLOCK_CACHE_ENABLED,\n         std::bind(&check_bool_value, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::READ_QPS_THROTTLING,\n         std::bind(&check_throttling, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::READ_SIZE_THROTTLING,\n         std::bind(&utils::token_bucket_throttling_controller::validate,\n                   std::placeholders::_1,\n                   std::placeholders::_2)},\n        {replica_envs::SPLIT_VALIDATE_PARTITION_HASH,\n         std::bind(&check_bool_value, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::USER_SPECIFIED_COMPACTION, nullptr},\n        {replica_envs::BACKUP_REQUEST_QPS_THROTTLING,\n         std::bind(&check_throttling, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND,\n         std::bind(&check_bool_value, std::placeholders::_1, std::placeholders::_2)},\n        {replica_envs::DENY_CLIENT_REQUEST,\n         std::bind(&check_deny_client, std::placeholders::_1, std::placeholders::_2)},\n        // TODO(zhaoliwei): not implemented\n        {replica_envs::BUSINESS_INFO, nullptr},\n        {replica_envs::TABLE_LEVEL_DEFAULT_TTL, nullptr},\n        {replica_envs::ROCKSDB_USAGE_SCENARIO, nullptr},\n        {replica_envs::ROCKSDB_CHECKPOINT_RESERVE_MIN_COUNT, nullptr},\n        {replica_envs::ROCKSDB_CHECKPOINT_RESERVE_TIME_SECONDS, nullptr},\n        {replica_envs::MANUAL_COMPACT_DISABLED, nullptr},\n        {replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT, nullptr},\n        {replica_envs::MANUAL_COMPACT_ONCE_TRIGGER_TIME, nullptr},\n        {replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL, nullptr},\n        {replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION, nullptr},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_TRIGGER_TIME, nullptr},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_TARGET_LEVEL, nullptr},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_BOTTOMMOST_LEVEL_COMPACTION, nullptr},\n        {replica_envs::REPLICA_ACCESS_CONTROLLER_ALLOWED_USERS, nullptr}};\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/app_env_validator.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <functional>\n#include <map>\n#include <string>\n#include <dsn/utility/singleton.h>\n\nnamespace dsn {\nnamespace replication {\n\nbool validate_app_env(const std::string &env_name,\n                      const std::string &env_value,\n                      std::string &hint_message);\n\nclass app_env_validator : public utils::singleton<app_env_validator>\n{\npublic:\n    bool validate_app_env(const std::string &env_name,\n                          const std::string &env_value,\n                          std::string &hint_message);\n\nprivate:\n    app_env_validator() { register_all_validators(); }\n    ~app_env_validator() = default;\n\n    void register_all_validators();\n\n    using validator_func = std::function<bool(const std::string &, std::string &)>;\n    std::map<std::string, validator_func> _validator_funcs;\n\n    friend class utils::singleton<app_env_validator>;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/backup_engine.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n\n#include \"common/backup_common.h\"\n#include \"common/replication_common.h\"\n#include \"server_state.h\"\n\nnamespace dsn {\nnamespace replication {\n\nbackup_engine::backup_engine(backup_service *service)\n    : _backup_service(service), _block_service(nullptr), _backup_path(\"\"), _is_backup_failed(false)\n{\n}\n\nbackup_engine::~backup_engine() { _tracker.cancel_outstanding_tasks(); }\n\nerror_code backup_engine::init_backup(int32_t app_id)\n{\n    std::string app_name;\n    int partition_count;\n    {\n        zauto_read_lock l;\n        _backup_service->get_state()->lock_read(l);\n        std::shared_ptr<app_state> app = _backup_service->get_state()->get_app(app_id);\n        if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n            derror_f(\"app {} is not available, couldn't do backup now.\", app_id);\n            return ERR_INVALID_STATE;\n        }\n        app_name = app->app_name;\n        partition_count = app->partition_count;\n    }\n\n    zauto_lock lock(_lock);\n    _backup_status.clear();\n    for (int i = 0; i < partition_count; ++i) {\n        _backup_status.emplace(i, backup_status::UNALIVE);\n    }\n    _cur_backup.app_id = app_id;\n    _cur_backup.app_name = app_name;\n    _cur_backup.backup_id = static_cast<int64_t>(dsn_now_ms());\n    _cur_backup.start_time_ms = _cur_backup.backup_id;\n    return ERR_OK;\n}\n\nerror_code backup_engine::set_block_service(const std::string &provider)\n{\n    _provider_type = provider;\n    _block_service = _backup_service->get_meta_service()\n                         ->get_block_service_manager()\n                         .get_or_create_block_filesystem(provider);\n    if (_block_service == nullptr) {\n        return ERR_INVALID_PARAMETERS;\n    }\n    return ERR_OK;\n}\n\nerror_code backup_engine::set_backup_path(const std::string &path)\n{\n    if (_block_service && _block_service->is_root_path_set()) {\n        return ERR_INVALID_PARAMETERS;\n    }\n    ddebug_f(\"backup path is set to {}.\", path);\n    _backup_path = path;\n    return ERR_OK;\n}\n\nerror_code backup_engine::write_backup_file(const std::string &file_name,\n                                            const dsn::blob &write_buffer)\n{\n    dist::block_service::create_file_request create_file_req;\n    create_file_req.ignore_metadata = true;\n    create_file_req.file_name = file_name;\n\n    dsn::error_code err;\n    dist::block_service::block_file_ptr remote_file;\n    _block_service\n        ->create_file(create_file_req,\n                      TASK_CODE_EXEC_INLINED,\n                      [&err, &remote_file](const dist::block_service::create_file_response &resp) {\n                          err = resp.err;\n                          remote_file = resp.file_handle;\n                      })\n        ->wait();\n    if (err != dsn::ERR_OK) {\n        ddebug_f(\"create file {} failed\", file_name);\n        return err;\n    }\n    dassert_f(remote_file != nullptr,\n              \"create file {} succeed, but can't get handle\",\n              create_file_req.file_name);\n    remote_file\n        ->write(dist::block_service::write_request{write_buffer},\n                TASK_CODE_EXEC_INLINED,\n                [&err](const dist::block_service::write_response &resp) { err = resp.err; })\n        ->wait();\n    return err;\n}\n\nerror_code backup_engine::backup_app_meta()\n{\n    dsn::blob app_info_buffer;\n    {\n        zauto_read_lock l;\n        _backup_service->get_state()->lock_read(l);\n        std::shared_ptr<app_state> app = _backup_service->get_state()->get_app(_cur_backup.app_id);\n        if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n            derror_f(\"app {} is not available, couldn't do backup now.\", _cur_backup.app_id);\n            return ERR_INVALID_STATE;\n        }\n        app_state tmp = *app;\n        // Because we don't restore app envs, so no need to write app envs to backup file.\n        // TODO(zhangyifan): backup and restore app envs when needed.\n        tmp.envs.clear();\n        app_info_buffer = dsn::json::json_forwarder<app_info>::encode(tmp);\n    }\n\n    std::string backup_root =\n        dsn::utils::filesystem::path_combine(_backup_path, _backup_service->backup_root());\n    std::string file_name = cold_backup::get_app_metadata_file(\n        backup_root, _cur_backup.app_name, _cur_backup.app_id, _cur_backup.backup_id);\n    return write_backup_file(file_name, app_info_buffer);\n}\n\nvoid backup_engine::backup_app_partition(const gpid &pid)\n{\n    dsn::rpc_address partition_primary;\n    {\n        zauto_read_lock l;\n        _backup_service->get_state()->lock_read(l);\n        std::shared_ptr<app_state> app = _backup_service->get_state()->get_app(pid.get_app_id());\n        if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n            derror_f(\"app {} is not available, couldn't do backup now.\", pid.get_app_id());\n\n            zauto_lock lock(_lock);\n            _is_backup_failed = true;\n            return;\n        }\n        partition_primary = app->partitions[pid.get_partition_index()].primary;\n    }\n\n    if (partition_primary.is_invalid()) {\n        dwarn_f(\"backup_id({}): partition {} doesn't have a primary now, retry to backup it later.\",\n                _cur_backup.backup_id,\n                pid.to_string());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this, pid]() { backup_app_partition(pid); },\n                         0,\n                         std::chrono::seconds(10));\n        return;\n    }\n\n    auto req = std::make_unique<backup_request>();\n    req->pid = pid;\n    policy_info backup_policy_info;\n    backup_policy_info.__set_backup_provider_type(_provider_type);\n    backup_policy_info.__set_policy_name(get_policy_name());\n    req->policy = backup_policy_info;\n    req->backup_id = _cur_backup.backup_id;\n    req->app_name = _cur_backup.app_name;\n    if (!_backup_path.empty()) {\n        req->__set_backup_path(_backup_path);\n    }\n\n    ddebug_f(\"backup_id({}): send backup request to partition {}, target_addr = {}\",\n             _cur_backup.backup_id,\n             pid.to_string(),\n             partition_primary.to_string());\n    backup_rpc rpc(std::move(req), RPC_COLD_BACKUP, 10000_ms, 0, pid.thread_hash());\n    rpc.call(\n        partition_primary, &_tracker, [this, rpc, pid, partition_primary](error_code err) mutable {\n            on_backup_reply(err, rpc.response(), pid, partition_primary);\n        });\n\n    zauto_lock l(_lock);\n    _backup_status[pid.get_partition_index()] = backup_status::ALIVE;\n}\n\ninline void backup_engine::handle_replica_backup_failed(const backup_response &response,\n                                                        const gpid pid)\n{\n    dcheck_eq(response.pid, pid);\n    dcheck_eq(response.backup_id, _cur_backup.backup_id);\n\n    derror_f(\"backup_id({}): backup for partition {} failed, response.err: {}\",\n             _cur_backup.backup_id,\n             pid.to_string(),\n             response.err.to_string());\n    zauto_lock l(_lock);\n    // if one partition fail, the whole backup plan fail.\n    _is_backup_failed = true;\n    _backup_status[pid.get_partition_index()] = backup_status::FAILED;\n}\n\ninline void backup_engine::retry_backup(const dsn::gpid pid)\n{\n    tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                     &_tracker,\n                     [this, pid]() { backup_app_partition(pid); },\n                     0,\n                     std::chrono::seconds(1));\n}\n\nvoid backup_engine::on_backup_reply(const error_code err,\n                                    const backup_response &response,\n                                    const gpid pid,\n                                    const rpc_address &primary)\n{\n    {\n        zauto_lock l(_lock);\n        // if backup of some partition failed, we would not handle response from other partitions.\n        if (_is_backup_failed) {\n            return;\n        }\n    }\n\n    // if backup completed, receive ERR_OK and\n    // resp.progress=cold_backup_constant::PROGRESS_FINISHED;\n    // if backup failed, receive ERR_LOCAL_APP_FAILURE;\n    // backup not completed in other cases.\n    // see replica::on_cold_backup() for details.\n\n    auto rep_error = err == ERR_OK ? response.err : err;\n\n    if (rep_error == ERR_LOCAL_APP_FAILURE) {\n        handle_replica_backup_failed(response, pid);\n        return;\n    }\n\n    if (rep_error != ERR_OK) {\n        derror_f(\"backup_id({}): backup request to server {} failed, error: {}, retry to \"\n                 \"send backup request.\",\n                 _cur_backup.backup_id,\n                 primary.to_string(),\n                 rep_error.to_string());\n        retry_backup(pid);\n        return;\n    };\n\n    if (response.progress == cold_backup_constant::PROGRESS_FINISHED) {\n        dcheck_eq(response.pid, pid);\n        dcheck_eq(response.backup_id, _cur_backup.backup_id);\n        ddebug_f(\"backup_id({}): backup for partition {} completed.\",\n                 _cur_backup.backup_id,\n                 pid.to_string());\n        {\n            zauto_lock l(_lock);\n            _backup_status[pid.get_partition_index()] = backup_status::COMPLETED;\n        }\n        complete_current_backup();\n        return;\n    }\n\n    // backup is not finished, meta polling to send request\n    ddebug_f(\"backup_id({}): receive backup response for partition {} from server {}, now \"\n             \"progress {}, retry to send backup request.\",\n             _cur_backup.backup_id,\n             pid.to_string(),\n             primary.to_string(),\n             response.progress);\n\n    retry_backup(pid);\n}\n\nvoid backup_engine::write_backup_info()\n{\n    std::string backup_root =\n        dsn::utils::filesystem::path_combine(_backup_path, _backup_service->backup_root());\n    std::string file_name = cold_backup::get_backup_info_file(backup_root, _cur_backup.backup_id);\n    blob buf = dsn::json::json_forwarder<app_backup_info>::encode(_cur_backup);\n    error_code err = write_backup_file(file_name, buf);\n    if (err == ERR_FS_INTERNAL) {\n        derror_f(\n            \"backup_id({}): write backup info failed, error {}, do not try again for this error.\",\n            _cur_backup.backup_id,\n            err.to_string());\n        zauto_lock l(_lock);\n        _is_backup_failed = true;\n        return;\n    }\n    if (err != ERR_OK) {\n        dwarn_f(\"backup_id({}): write backup info failed, retry it later.\", _cur_backup.backup_id);\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this]() { write_backup_info(); },\n                         0,\n                         std::chrono::seconds(1));\n        return;\n    }\n    ddebug_f(\"backup_id({}): successfully wrote backup info, backup for app {} completed.\",\n             _cur_backup.backup_id,\n             _cur_backup.app_id);\n    zauto_lock l(_lock);\n    _cur_backup.end_time_ms = dsn_now_ms();\n}\n\nvoid backup_engine::complete_current_backup()\n{\n    {\n        zauto_lock l(_lock);\n        for (const auto &status : _backup_status) {\n            if (status.second != backup_status::COMPLETED) {\n                // backup for some partition was not finished.\n                return;\n            }\n        }\n    }\n    // complete backup for all partitions.\n    write_backup_info();\n}\n\nerror_code backup_engine::start()\n{\n    error_code err = backup_app_meta();\n    if (err != ERR_OK) {\n        derror_f(\"backup_id({}): backup meta data for app {} failed, error {}\",\n                 _cur_backup.backup_id,\n                 _cur_backup.app_id,\n                 err.to_string());\n        return err;\n    }\n    for (int i = 0; i < _backup_status.size(); ++i) {\n        tasking::enqueue(LPC_DEFAULT_CALLBACK, &_tracker, [this, i]() {\n            backup_app_partition(gpid(_cur_backup.app_id, i));\n        });\n    }\n    return ERR_OK;\n}\n\nbool backup_engine::is_in_progress() const\n{\n    zauto_lock l(_lock);\n    return _cur_backup.end_time_ms == 0 && !_is_backup_failed;\n}\n\nbackup_item backup_engine::get_backup_item() const\n{\n    zauto_lock l(_lock);\n    backup_item item;\n    item.backup_id = _cur_backup.backup_id;\n    item.app_name = _cur_backup.app_name;\n    item.backup_path = _backup_path;\n    item.backup_provider_type = _provider_type;\n    item.start_time_ms = _cur_backup.start_time_ms;\n    item.end_time_ms = _cur_backup.end_time_ms;\n    item.is_backup_failed = _is_backup_failed;\n    return item;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/backup_engine.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/cpp/json_helper.h>\n#include <dsn/dist/block_service.h>\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\nnamespace replication {\n\nenum backup_status\n{\n    UNALIVE = 1,\n    ALIVE = 2,\n    COMPLETED = 3,\n    FAILED = 4\n};\n\nstruct app_backup_info\n{\n    int64_t backup_id;\n    int64_t start_time_ms;\n    int64_t end_time_ms;\n\n    int32_t app_id;\n    std::string app_name;\n\n    app_backup_info() : backup_id(0), start_time_ms(0), end_time_ms(0) {}\n\n    DEFINE_JSON_SERIALIZATION(backup_id, start_time_ms, end_time_ms, app_id, app_name)\n};\n\nclass app_state;\nclass backup_service;\n\nclass backup_engine\n{\npublic:\n    backup_engine(backup_service *service);\n    ~backup_engine();\n\n    error_code init_backup(int32_t app_id);\n    error_code set_block_service(const std::string &provider);\n    error_code set_backup_path(const std::string &path);\n\n    error_code start();\n\n    int64_t get_current_backup_id() const { return _cur_backup.backup_id; }\n    int32_t get_backup_app_id() const { return _cur_backup.app_id; }\n    bool is_in_progress() const;\n\n    backup_item get_backup_item() const;\n\nprivate:\n    friend class backup_engine_test;\n    friend class backup_service_test;\n\n    FRIEND_TEST(backup_engine_test, test_on_backup_reply);\n    FRIEND_TEST(backup_engine_test, test_backup_completed);\n    FRIEND_TEST(backup_engine_test, test_write_backup_info_failed);\n\n    error_code write_backup_file(const std::string &file_name, const dsn::blob &write_buffer);\n    error_code backup_app_meta();\n    void backup_app_partition(const gpid &pid);\n    void on_backup_reply(error_code err,\n                         const backup_response &response,\n                         gpid pid,\n                         const rpc_address &primary);\n    void write_backup_info();\n    void complete_current_backup();\n    void handle_replica_backup_failed(const backup_response &response, const gpid pid);\n    void retry_backup(const dsn::gpid pid);\n\n    const std::string get_policy_name() const\n    {\n        return \"fake_policy_\" + std::to_string(_cur_backup.backup_id);\n    }\n\n    backup_service *_backup_service;\n    dist::block_service::block_filesystem *_block_service;\n    std::string _backup_path;\n    std::string _provider_type;\n    dsn::task_tracker _tracker;\n\n    // lock the following variables.\n    mutable dsn::zlock _lock;\n    bool _is_backup_failed;\n    app_backup_info _cur_backup;\n    // partition_id -> backup_status\n    std::map<int32_t, backup_status> _backup_status;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/cluster_balance_policy.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"cluster_balance_policy.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DEFINE_uint32(\"meta_server\",\n                  balance_op_count_per_round,\n                  10,\n                  \"balance operation count per round for cluster balancer\");\nDSN_TAG_VARIABLE(balance_op_count_per_round, FT_MUTABLE);\n\nuint32_t get_partition_count(const node_state &ns, balance_type type, int32_t app_id)\n{\n    unsigned count = 0;\n    switch (type) {\n    case balance_type::COPY_SECONDARY:\n        if (app_id > 0) {\n            count = ns.partition_count(app_id) - ns.primary_count(app_id);\n        } else {\n            count = ns.partition_count() - ns.primary_count();\n        }\n        break;\n    case balance_type::COPY_PRIMARY:\n        if (app_id > 0) {\n            count = ns.primary_count(app_id);\n        } else {\n            count = ns.primary_count();\n        }\n        break;\n    default:\n        break;\n    }\n    return (uint32_t)count;\n}\n\nuint32_t get_skew(const std::map<rpc_address, uint32_t> &count_map)\n{\n    uint32_t min = UINT_MAX, max = 0;\n    for (const auto &kv : count_map) {\n        if (kv.second < min) {\n            min = kv.second;\n        }\n        if (kv.second > max) {\n            max = kv.second;\n        }\n    }\n    return max - min;\n}\n\nvoid get_min_max_set(const std::map<rpc_address, uint32_t> &node_count_map,\n                     /*out*/ std::set<rpc_address> &min_set,\n                     /*out*/ std::set<rpc_address> &max_set)\n{\n    std::multimap<uint32_t, rpc_address> count_multimap = utils::flip_map(node_count_map);\n\n    auto range = count_multimap.equal_range(count_multimap.begin()->first);\n    for (auto iter = range.first; iter != range.second; ++iter) {\n        min_set.insert(iter->second);\n    }\n\n    range = count_multimap.equal_range(count_multimap.rbegin()->first);\n    for (auto iter = range.first; iter != range.second; ++iter) {\n        max_set.insert(iter->second);\n    }\n}\n\ncluster_balance_policy::cluster_balance_policy(meta_service *svc) : load_balance_policy(svc) {}\n\nvoid cluster_balance_policy::balance(bool checker,\n                                     const meta_view *global_view,\n                                     migration_list *list)\n{\n    init(global_view, list);\n\n    if (!execute_balance(*_global_view->apps,\n                         false, /* balance_checker */\n                         true,  /* balance_in_turn */\n                         true,  /* only_move_primary */\n                         std::bind(&cluster_balance_policy::primary_balance,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2))) {\n        return;\n    }\n\n    bool need_continue =\n        cluster_replica_balance(_global_view, balance_type::COPY_SECONDARY, *_migration_result);\n    if (!need_continue) {\n        return;\n    }\n\n    cluster_replica_balance(_global_view, balance_type::COPY_PRIMARY, *_migration_result);\n}\n\nbool cluster_balance_policy::cluster_replica_balance(const meta_view *global_view,\n                                                     const balance_type type,\n                                                     /*out*/ migration_list &list)\n{\n    bool enough_information = do_cluster_replica_balance(global_view, type, list);\n    if (!enough_information) {\n        return false;\n    }\n    if (!list.empty()) {\n        ddebug_f(\"migration count of {} = {}\", enum_to_string(type), list.size());\n        return false;\n    }\n    return true;\n}\n\nbool cluster_balance_policy::do_cluster_replica_balance(const meta_view *global_view,\n                                                        const balance_type type,\n                                                        /*out*/ migration_list &list)\n{\n    cluster_migration_info cluster_info;\n    if (!get_cluster_migration_info(global_view, type, cluster_info)) {\n        return false;\n    }\n\n    partition_set selected_pid;\n    move_info next_move;\n    while (get_next_move(cluster_info, selected_pid, next_move)) {\n        if (!apply_move(next_move, selected_pid, list, cluster_info)) {\n            break;\n        }\n        if (list.size() >= FLAGS_balance_op_count_per_round) {\n            break;\n        }\n    }\n\n    return true;\n}\n\nbool cluster_balance_policy::get_cluster_migration_info(\n    const meta_view *global_view,\n    const balance_type type,\n    /*out*/ cluster_migration_info &cluster_info)\n{\n    const node_mapper &nodes = *global_view->nodes;\n    if (nodes.size() < 3) {\n        return false;\n    }\n\n    const app_mapper &all_apps = *global_view->apps;\n    app_mapper apps;\n    for (const auto &kv : all_apps) {\n        const std::shared_ptr<app_state> &app = kv.second;\n        auto ignored = is_ignored_app(app->app_id);\n        if (ignored || app->is_bulk_loading || app->splitting()) {\n            ddebug_f(\"skip to balance app({}), ignored={}, bulk loading={}, splitting={}\",\n                     app->app_name,\n                     ignored,\n                     app->is_bulk_loading,\n                     app->splitting());\n            continue;\n        }\n        if (app->status == app_status::AS_AVAILABLE) {\n            apps[app->app_id] = app;\n        }\n    }\n\n    for (const auto &kv : apps) {\n        std::shared_ptr<app_state> app = kv.second;\n        app_migration_info info;\n        if (!get_app_migration_info(app, nodes, type, info)) {\n            return false;\n        }\n        cluster_info.apps_info.emplace(kv.first, std::move(info));\n        cluster_info.apps_skew[kv.first] = get_skew(info.replicas_count);\n    }\n\n    for (const auto &kv : nodes) {\n        const node_state &ns = kv.second;\n        node_migration_info info;\n        get_node_migration_info(ns, apps, info);\n        cluster_info.nodes_info.emplace(kv.first, std::move(info));\n\n        auto count = get_partition_count(ns, type, -1);\n        cluster_info.replicas_count[kv.first] = count;\n    }\n\n    cluster_info.type = type;\n    return true;\n}\n\nbool cluster_balance_policy::get_app_migration_info(std::shared_ptr<app_state> app,\n                                                    const node_mapper &nodes,\n                                                    const balance_type type,\n                                                    app_migration_info &info)\n{\n    info.app_id = app->app_id;\n    info.app_name = app->app_name;\n    info.partitions.resize(app->partitions.size());\n    for (auto i = 0; i < app->partitions.size(); ++i) {\n        std::map<rpc_address, partition_status::type> pstatus_map;\n        pstatus_map[app->partitions[i].primary] = partition_status::PS_PRIMARY;\n        if (app->partitions[i].secondaries.size() != app->partitions[i].max_replica_count - 1) {\n            // partition is unhealthy\n            return false;\n        }\n        for (const auto &addr : app->partitions[i].secondaries) {\n            pstatus_map[addr] = partition_status::PS_SECONDARY;\n        }\n        info.partitions[i] = pstatus_map;\n    }\n\n    for (const auto &it : nodes) {\n        const node_state &ns = it.second;\n        auto count = get_partition_count(ns, type, app->app_id);\n        info.replicas_count[ns.addr()] = count;\n    }\n\n    return true;\n}\n\nvoid cluster_balance_policy::get_node_migration_info(const node_state &ns,\n                                                     const app_mapper &apps,\n                                                     /*out*/ node_migration_info &info)\n{\n    info.address = ns.addr();\n    for (const auto &iter : apps) {\n        std::shared_ptr<app_state> app = iter.second;\n        for (const auto &context : app->helpers->contexts) {\n            std::string disk_tag;\n            if (!context.get_disk_tag(ns.addr(), disk_tag)) {\n                continue;\n            }\n            auto pid = context.config_owner->pid;\n            if (info.partitions.find(disk_tag) != info.partitions.end()) {\n                info.partitions[disk_tag].insert(pid);\n            } else {\n                partition_set pset;\n                pset.insert(pid);\n                info.partitions.emplace(disk_tag, pset);\n            }\n        }\n    }\n}\n\nbool cluster_balance_policy::get_next_move(const cluster_migration_info &cluster_info,\n                                           const partition_set &selected_pid,\n                                           /*out*/ move_info &next_move)\n{\n    // key-app skew, value-app id\n    std::multimap<uint32_t, int32_t> app_skew_multimap = utils::flip_map(cluster_info.apps_skew);\n    auto max_app_skew = app_skew_multimap.rbegin()->first;\n    if (max_app_skew == 0) {\n        ddebug_f(\"every app is balanced and any move will unbalance a app\");\n        return false;\n    }\n\n    auto server_skew = get_skew(cluster_info.replicas_count);\n    if (max_app_skew <= 1 && server_skew <= 1) {\n        ddebug_f(\"every app is balanced and the cluster as a whole is balanced\");\n        return false;\n    }\n\n    /**\n     * Among the apps with maximum skew, attempt to pick a app where there is\n     * a move that improves the app skew and the cluster skew, if possible. If\n     * not, attempt to pick a move that improves the app skew.\n     **/\n    std::set<rpc_address> cluster_min_count_nodes;\n    std::set<rpc_address> cluster_max_count_nodes;\n    get_min_max_set(cluster_info.replicas_count, cluster_min_count_nodes, cluster_max_count_nodes);\n\n    bool found = false;\n    auto app_range = app_skew_multimap.equal_range(max_app_skew);\n    for (auto iter = app_range.first; iter != app_range.second; ++iter) {\n        auto app_id = iter->second;\n        auto it = cluster_info.apps_info.find(app_id);\n        if (it == cluster_info.apps_info.end()) {\n            continue;\n        }\n        auto app_map = it->second.replicas_count;\n        std::set<rpc_address> app_min_count_nodes;\n        std::set<rpc_address> app_max_count_nodes;\n        get_min_max_set(app_map, app_min_count_nodes, app_max_count_nodes);\n\n        /**\n         * Compute the intersection of the replica servers most loaded for the app\n         * with the replica servers most loaded overall, and likewise for least loaded.\n         * These are our ideal candidates for moving from and to, respectively.\n         **/\n        std::set<rpc_address> app_cluster_min_set =\n            utils::get_intersection(app_min_count_nodes, cluster_min_count_nodes);\n        std::set<rpc_address> app_cluster_max_set =\n            utils::get_intersection(app_max_count_nodes, cluster_max_count_nodes);\n\n        /**\n         * Do not move replicas of a balanced app if the least (most) loaded\n         * servers overall do not intersect the servers hosting the least (most)\n         * replicas of the app. Moving a replica in that case might keep the\n         * cluster skew the same or make it worse while keeping the app balanced.\n         **/\n        std::multimap<uint32_t, rpc_address> app_count_multimap = utils::flip_map(app_map);\n        if (app_count_multimap.rbegin()->first <= app_count_multimap.begin()->first + 1 &&\n            (app_cluster_min_set.empty() || app_cluster_max_set.empty())) {\n            ddebug_f(\"do not move replicas of a balanced app({}) if the least (most) loaded \"\n                     \"servers overall do not intersect the servers hosting the least (most) \"\n                     \"replicas of the app\",\n                     app_id);\n            continue;\n        }\n\n        if (pick_up_move(cluster_info,\n                         app_cluster_max_set.empty() ? app_max_count_nodes : app_cluster_max_set,\n                         app_cluster_min_set.empty() ? app_min_count_nodes : app_cluster_min_set,\n                         app_id,\n                         selected_pid,\n                         next_move)) {\n            found = true;\n            break;\n        }\n    }\n\n    return found;\n}\n\ntemplate <typename S>\nauto select_random(const S &s, size_t n)\n{\n    auto it = std::begin(s);\n    std::advance(it, n);\n    return it;\n}\n\nbool cluster_balance_policy::pick_up_move(const cluster_migration_info &cluster_info,\n                                          const std::set<rpc_address> &max_nodes,\n                                          const std::set<rpc_address> &min_nodes,\n                                          const int32_t app_id,\n                                          const partition_set &selected_pid,\n                                          /*out*/ move_info &move_info)\n{\n    std::set<app_disk_info> max_load_disk_set;\n    get_max_load_disk_set(cluster_info, max_nodes, app_id, max_load_disk_set);\n    if (max_load_disk_set.empty()) {\n        return false;\n    }\n    auto index = rand() % max_load_disk_set.size();\n    auto max_load_disk = *select_random(max_load_disk_set, index);\n    ddebug_f(\"most load disk({}) on node({}) is picked, has {} partition\",\n             max_load_disk.node.to_string(),\n             max_load_disk.disk_tag,\n             max_load_disk.partitions.size());\n    for (const auto &node_addr : min_nodes) {\n        gpid picked_pid;\n        if (pick_up_partition(\n                cluster_info, node_addr, max_load_disk.partitions, selected_pid, picked_pid)) {\n            move_info.pid = picked_pid;\n            move_info.source_node = max_load_disk.node;\n            move_info.source_disk_tag = max_load_disk.disk_tag;\n            move_info.target_node = node_addr;\n            move_info.type = cluster_info.type;\n            ddebug_f(\"partition[{}] will migrate from {} to {}\",\n                     picked_pid,\n                     max_load_disk.node.to_string(),\n                     node_addr.to_string());\n            return true;\n        }\n    }\n    ddebug_f(\"can not find a partition(app_id={}) from random max load disk(node={}, disk={})\",\n             app_id,\n             max_load_disk.node.to_string(),\n             max_load_disk.disk_tag);\n    return false;\n}\n\nvoid cluster_balance_policy::get_max_load_disk_set(\n    const cluster_migration_info &cluster_info,\n    const std::set<rpc_address> &max_nodes,\n    const int32_t app_id,\n    /*out*/ std::set<app_disk_info> &max_load_disk_set)\n{\n    // key: partition count (app_disk_info.partitions.size())\n    // value: app_disk_info structure\n    std::multimap<uint32_t, app_disk_info> app_disk_info_multimap;\n    for (const auto &node_addr : max_nodes) {\n        // key: disk_tag\n        // value: partition set for app(app id=app_id) in node(addr=node_addr)\n        std::map<std::string, partition_set> disk_partitions =\n            get_disk_partitions_map(cluster_info, node_addr, app_id);\n        for (const auto &kv : disk_partitions) {\n            app_disk_info info;\n            info.app_id = app_id;\n            info.node = node_addr;\n            info.disk_tag = kv.first;\n            info.partitions = kv.second;\n            app_disk_info_multimap.insert(\n                std::pair<uint32_t, app_disk_info>(kv.second.size(), info));\n        }\n    }\n    auto range = app_disk_info_multimap.equal_range(app_disk_info_multimap.rbegin()->first);\n    for (auto iter = range.first; iter != range.second; ++iter) {\n        max_load_disk_set.insert(iter->second);\n    }\n}\n\nstd::map<std::string, partition_set> cluster_balance_policy::get_disk_partitions_map(\n    const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id)\n{\n    std::map<std::string, partition_set> disk_partitions;\n    auto app_iter = cluster_info.apps_info.find(app_id);\n    auto node_iter = cluster_info.nodes_info.find(addr);\n    if (app_iter == cluster_info.apps_info.end() || node_iter == cluster_info.nodes_info.end()) {\n        return disk_partitions;\n    }\n\n    auto status = cluster_info.type == balance_type::COPY_SECONDARY ? partition_status::PS_SECONDARY\n                                                                    : partition_status::PS_PRIMARY;\n    auto app_partition = app_iter->second.partitions;\n    auto disk_partition = node_iter->second.partitions;\n    for (const auto &kv : disk_partition) {\n        auto disk_tag = kv.first;\n        for (const auto &pid : kv.second) {\n            if (pid.get_app_id() != app_id) {\n                continue;\n            }\n            auto status_map = app_partition[pid.get_partition_index()];\n            auto iter = status_map.find(addr);\n            if (iter != status_map.end() && iter->second == status) {\n                disk_partitions[disk_tag].insert(pid);\n            }\n        }\n    }\n    return disk_partitions;\n}\n\nbool cluster_balance_policy::pick_up_partition(const cluster_migration_info &cluster_info,\n                                               const rpc_address &min_node_addr,\n                                               const partition_set &max_load_partitions,\n                                               const partition_set &selected_pid,\n                                               /*out*/ gpid &picked_pid)\n{\n    bool found = false;\n    for (const auto &pid : max_load_partitions) {\n        auto iter = cluster_info.apps_info.find(pid.get_app_id());\n        if (iter == cluster_info.apps_info.end()) {\n            continue;\n        }\n\n        // partition has already in mirgration list\n        if (selected_pid.find(pid) != selected_pid.end()) {\n            continue;\n        }\n\n        // partition has already been primary or secondary on min_node\n        app_migration_info info = iter->second;\n        if (info.get_partition_status(pid.get_partition_index(), min_node_addr) !=\n            partition_status::PS_INACTIVE) {\n            continue;\n        }\n\n        picked_pid = pid;\n        found = true;\n        break;\n    }\n    return found;\n}\n\nbool cluster_balance_policy::apply_move(const move_info &move,\n                                        /*out*/ partition_set &selected_pids,\n                                        /*out*/ migration_list &list,\n                                        /*out*/ cluster_migration_info &cluster_info)\n{\n    int32_t app_id = move.pid.get_app_id();\n    rpc_address source = move.source_node, target = move.target_node;\n    if (cluster_info.apps_skew.find(app_id) == cluster_info.apps_skew.end() ||\n        cluster_info.replicas_count.find(source) == cluster_info.replicas_count.end() ||\n        cluster_info.replicas_count.find(target) == cluster_info.replicas_count.end() ||\n        cluster_info.apps_info.find(app_id) == cluster_info.apps_info.end()) {\n        return false;\n    }\n\n    app_migration_info app_info = cluster_info.apps_info[app_id];\n    if (app_info.partitions.size() <= move.pid.get_partition_index() ||\n        app_info.replicas_count.find(source) == app_info.replicas_count.end() ||\n        app_info.replicas_count.find(target) == app_info.replicas_count.end()) {\n        return false;\n    }\n    app_info.replicas_count[source]--;\n    app_info.replicas_count[target]++;\n\n    auto &pmap = app_info.partitions[move.pid.get_partition_index()];\n    rpc_address primary_addr;\n    for (const auto &kv : pmap) {\n        if (kv.second == partition_status::PS_PRIMARY) {\n            primary_addr = kv.first;\n        }\n    }\n    auto status = cluster_info.type == balance_type::COPY_SECONDARY ? partition_status::PS_SECONDARY\n                                                                    : partition_status::PS_PRIMARY;\n    auto iter = pmap.find(source);\n    if (iter == pmap.end() || iter->second != status) {\n        return false;\n    }\n    pmap.erase(source);\n    pmap[target] = status;\n\n    auto iters = cluster_info.nodes_info.find(source);\n    auto itert = cluster_info.nodes_info.find(target);\n    if (iters == cluster_info.nodes_info.end() || itert == cluster_info.nodes_info.end()) {\n        return false;\n    }\n    node_migration_info node_source = iters->second;\n    node_migration_info node_target = itert->second;\n    auto it = node_source.partitions.find(move.source_disk_tag);\n    if (it == node_source.partitions.end()) {\n        return false;\n    }\n    it->second.erase(move.pid);\n    node_target.future_partitions.insert(move.pid);\n\n    // add into migration list and selected_pid\n    partition_configuration pc;\n    pc.pid = move.pid;\n    pc.primary = primary_addr;\n    list[move.pid] = generate_balancer_request(*_global_view->apps, pc, move.type, source, target);\n    _migration_result->emplace(\n        move.pid, generate_balancer_request(*_global_view->apps, pc, move.type, source, target));\n    selected_pids.insert(move.pid);\n\n    cluster_info.apps_skew[app_id] = get_skew(app_info.replicas_count);\n    cluster_info.apps_info[app_id] = app_info;\n    cluster_info.nodes_info[source] = node_source;\n    cluster_info.nodes_info[target] = node_target;\n    cluster_info.replicas_count[source]--;\n    cluster_info.replicas_count[target]++;\n    return true;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/cluster_balance_policy.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"load_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\nuint32_t get_partition_count(const node_state &ns, balance_type type, int32_t app_id);\nuint32_t get_skew(const std::map<rpc_address, uint32_t> &count_map);\nvoid get_min_max_set(const std::map<rpc_address, uint32_t> &node_count_map,\n                     /*out*/ std::set<rpc_address> &min_set,\n                     /*out*/ std::set<rpc_address> &max_set);\n\nclass cluster_balance_policy : public load_balance_policy\n{\npublic:\n    cluster_balance_policy(meta_service *svc);\n    ~cluster_balance_policy() = default;\n\n    void balance(bool checker, const meta_view *global_view, migration_list *list);\n\nprivate:\n    struct cluster_migration_info;\n    struct app_migration_info;\n    struct node_migration_info;\n    struct move_info;\n    struct app_disk_info;\n    bool cluster_replica_balance(const meta_view *global_view,\n                                 const balance_type type,\n                                 /*out*/ migration_list &list);\n    bool do_cluster_replica_balance(const meta_view *global_view,\n                                    const balance_type type,\n                                    /*out*/ migration_list &list);\n    bool get_cluster_migration_info(const meta_view *global_view,\n                                    const balance_type type,\n                                    /*out*/ cluster_migration_info &cluster_info);\n    bool get_app_migration_info(std::shared_ptr<app_state> app,\n                                const node_mapper &nodes,\n                                const balance_type type,\n                                /*out*/ app_migration_info &info);\n    void get_node_migration_info(const node_state &ns,\n                                 const app_mapper &all_apps,\n                                 /*out*/ node_migration_info &info);\n    bool get_next_move(const cluster_migration_info &cluster_info,\n                       const partition_set &selected_pid,\n                       /*out*/ move_info &next_move);\n    bool pick_up_move(const cluster_migration_info &cluster_info,\n                      const std::set<rpc_address> &max_nodes,\n                      const std::set<rpc_address> &min_nodes,\n                      const int32_t app_id,\n                      const partition_set &selected_pid,\n                      /*out*/ move_info &move_info);\n    void get_max_load_disk_set(const cluster_migration_info &cluster_info,\n                               const std::set<rpc_address> &max_nodes,\n                               const int32_t app_id,\n                               /*out*/ std::set<app_disk_info> &max_load_disk_set);\n    std::map<std::string, partition_set> get_disk_partitions_map(\n        const cluster_migration_info &cluster_info, const rpc_address &addr, const int32_t app_id);\n    bool pick_up_partition(const cluster_migration_info &cluster_info,\n                           const rpc_address &min_node_addr,\n                           const partition_set &max_load_partitions,\n                           const partition_set &selected_pid,\n                           /*out*/ gpid &picked_pid);\n    bool apply_move(const move_info &move,\n                    /*out*/ partition_set &selected_pids,\n                    /*out*/ migration_list &list,\n                    /*out*/ cluster_migration_info &cluster_info);\n\n    struct app_migration_info\n    {\n        int32_t app_id;\n        std::string app_name;\n        std::vector<std::map<rpc_address, partition_status::type>> partitions;\n        std::map<rpc_address, uint32_t> replicas_count;\n        bool operator<(const app_migration_info &another) const\n        {\n            if (app_id < another.app_id)\n                return true;\n            return false;\n        }\n        bool operator==(const app_migration_info &another) const\n        {\n            return app_id == another.app_id;\n        }\n        partition_status::type get_partition_status(int32_t pidx, rpc_address addr)\n        {\n            for (const auto &kv : partitions[pidx]) {\n                if (kv.first == addr) {\n                    return kv.second;\n                }\n            }\n            return partition_status::PS_INACTIVE;\n        }\n    };\n\n    struct node_migration_info\n    {\n        rpc_address address;\n        // key-disk tag, value-partition set\n        std::map<std::string, partition_set> partitions;\n        partition_set future_partitions;\n        bool operator<(const node_migration_info &another) const\n        {\n            return address < another.address;\n        }\n        bool operator==(const node_migration_info &another) const\n        {\n            return address == another.address;\n        }\n    };\n\n    struct cluster_migration_info\n    {\n        balance_type type;\n        std::map<int32_t, uint32_t> apps_skew;\n        std::map<int32_t, app_migration_info> apps_info;\n        std::map<rpc_address, node_migration_info> nodes_info;\n        std::map<rpc_address, uint32_t> replicas_count;\n    };\n\n    struct app_disk_info\n    {\n        int32_t app_id;\n        rpc_address node;\n        std::string disk_tag;\n        partition_set partitions;\n        bool operator==(const app_disk_info &another) const\n        {\n            return app_id == another.app_id && node == another.node && disk_tag == another.disk_tag;\n        }\n        bool operator<(const app_disk_info &another) const\n        {\n            if (app_id < another.app_id || (app_id == another.app_id && node < another.node) ||\n                (app_id == another.app_id && node == another.node && disk_tag < another.disk_tag))\n                return true;\n            return false;\n        }\n    };\n\n    struct move_info\n    {\n        gpid pid;\n        rpc_address source_node;\n        std::string source_disk_tag;\n        rpc_address target_node;\n        balance_type type;\n    };\n\n    FRIEND_TEST(cluster_balance_policy, app_migration_info);\n    FRIEND_TEST(cluster_balance_policy, node_migration_info);\n    FRIEND_TEST(cluster_balance_policy, get_skew);\n    FRIEND_TEST(cluster_balance_policy, get_partition_count);\n    FRIEND_TEST(cluster_balance_policy, get_app_migration_info);\n    FRIEND_TEST(cluster_balance_policy, get_node_migration_info);\n    FRIEND_TEST(cluster_balance_policy, get_disk_partitions_map);\n    FRIEND_TEST(cluster_balance_policy, get_max_load_disk_set);\n    FRIEND_TEST(cluster_balance_policy, apply_move);\n    FRIEND_TEST(cluster_balance_policy, pick_up_partition);\n    FRIEND_TEST(cluster_balance_policy, execute_balance);\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/distributed_lock_service_simple.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a simple version of distributed lock service for development\n *\n * Revision history:\n *     2015-11-04, @imzhenyu (Zhenyu.Guo@microsoft.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool-api/async_calls.h>\n\n#include \"common/replication_common.h\"\n#include \"distributed_lock_service_simple.h\"\n\nnamespace dsn {\nnamespace dist {\nDEFINE_TASK_CODE(LPC_DIST_LOCK_SVC_RANDOM_EXPIRE, TASK_PRIORITY_COMMON, THREAD_POOL_META_SERVER)\n\nstatic void __lock_cb_bind_and_enqueue(task_ptr lock_task,\n                                       error_code err,\n                                       const std::string &owner,\n                                       uint64_t version,\n                                       int delay_milliseconds = 0)\n{\n    auto t = dynamic_cast<lock_future *>(lock_task.get());\n    t->enqueue_with(err, owner, version, delay_milliseconds);\n}\n\nvoid distributed_lock_service_simple::random_lock_lease_expire(const std::string &lock_id)\n{\n    // TODO: let's test without failure first\n    return;\n\n    std::string owner;\n    uint64_t version;\n    lock_wait_info next;\n    task_ptr lease_callback;\n\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it != _dlocks.end()) {\n            if (it->second.owner != \"\") {\n                owner = it->second.owner;\n                version = it->second.version;\n                lease_callback = it->second.lease_callback;\n\n                if (it->second.pending_list.size() > 0) {\n                    next = it->second.pending_list.front();\n\n                    it->second.owner = next.owner;\n                    it->second.version++;\n                    it->second.lease_callback = next.lease_callback;\n                    it->second.pending_list.pop_front();\n                } else {\n                    next.owner = \"\";\n                    it->second.owner = \"\";\n                    it->second.version++;\n                    it->second.lease_callback = nullptr;\n                }\n            } else\n                return;\n        } else {\n            dsn_task_cancel_current_timer();\n            return;\n        }\n    }\n\n    __lock_cb_bind_and_enqueue(lease_callback, ERR_EXPIRED, owner, version, 0);\n\n    if (next.owner != \"\") {\n        version++;\n        error_code err = ERR_OK;\n        __lock_cb_bind_and_enqueue(next.grant_callback, err, next.owner, version, 0);\n    }\n}\n\nerror_code distributed_lock_service_simple::initialize(const std::vector<std::string> & /*argc*/)\n{\n    return ERR_OK;\n}\n\nstd::pair<task_ptr, task_ptr>\ndistributed_lock_service_simple::lock(const std::string &lock_id,\n                                      const std::string &myself_id,\n                                      task_code lock_cb_code,\n                                      const lock_callback &lock_cb,\n                                      task_code lease_expire_code,\n                                      const lock_callback &lease_expire_callback,\n                                      const lock_options &opt)\n{\n    task_ptr grant_cb(new lock_future(lock_cb_code, lock_cb, 0));\n    task_ptr lease_cb(new lock_future(lease_expire_code, lease_expire_callback, 0));\n\n    error_code err;\n    std::string cowner;\n    uint64_t version = 0;\n    bool is_new = false;\n\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it == _dlocks.end()) {\n            if (!opt.create_if_not_exist)\n                err = ERR_OBJECT_NOT_FOUND;\n            else {\n                lock_info li;\n                li.owner = myself_id;\n                li.version = 1;\n                li.lease_callback = lease_cb;\n                _dlocks.insert(locks::value_type(lock_id, li));\n\n                err = ERR_OK;\n                cowner = myself_id;\n                version = 1;\n                is_new = true;\n            }\n        } else {\n            if (it->second.owner != \"\") {\n                if (it->second.owner == myself_id) {\n                    err = ERR_RECURSIVE_LOCK;\n                    cowner = myself_id;\n                    version = it->second.version;\n                } else {\n                    err = ERR_IO_PENDING;\n\n                    lock_wait_info wi;\n                    wi.grant_callback = grant_cb;\n                    wi.lease_callback = lease_cb;\n                    wi.owner = myself_id;\n                    it->second.pending_list.push_back(wi);\n                }\n            } else {\n                it->second.lease_callback = lease_cb;\n                it->second.owner = myself_id;\n                it->second.version++;\n\n                err = ERR_OK;\n                cowner = myself_id;\n                version = it->second.version;\n            }\n        }\n    }\n\n    if (is_new) {\n        tasking::enqueue_timer(LPC_DIST_LOCK_SVC_RANDOM_EXPIRE,\n                               &_tracker,\n                               [=]() { random_lock_lease_expire(lock_id); },\n                               std::chrono::minutes(5),\n                               0,\n                               std::chrono::seconds(1));\n    }\n\n    if (err != ERR_IO_PENDING) {\n        __lock_cb_bind_and_enqueue(grant_cb, err, cowner, version);\n    }\n\n    return std::pair<task_ptr, task_ptr>(grant_cb, lease_cb);\n}\n\ntask_ptr distributed_lock_service_simple::cancel_pending_lock(const std::string &lock_id,\n                                                              const std::string &myself_id,\n                                                              task_code cb_code,\n                                                              const lock_callback &cb)\n{\n    error_code err;\n    std::string cowner;\n    uint64_t version;\n\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it == _dlocks.end()) {\n            err = ERR_OBJECT_NOT_FOUND;\n            cowner = \"\";\n            version = 0;\n        } else {\n            cowner = it->second.owner;\n            version = it->second.version;\n            err = ERR_OBJECT_NOT_FOUND;\n            for (auto it2 = it->second.pending_list.begin(); it2 != it->second.pending_list.end();\n                 it2++) {\n                auto &w = *it2;\n                if (w.owner == myself_id) {\n                    err = ERR_OK;\n                    it->second.pending_list.erase(it2);\n                    break;\n                }\n            }\n        }\n    }\n\n    return tasking::enqueue(cb_code, nullptr, [=]() { cb(err, cowner, version); });\n}\n\ntask_ptr distributed_lock_service_simple::unlock(const std::string &lock_id,\n                                                 const std::string &myself_id,\n                                                 bool destroy,\n                                                 task_code cb_code,\n                                                 const err_callback &cb)\n{\n    error_code err;\n    lock_wait_info next;\n    uint64_t next_version = 0;\n\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it == _dlocks.end()) {\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            if (it->second.owner != myself_id) {\n                err = ERR_HOLD_BY_OTHERS;\n            } else {\n                err = ERR_OK;\n\n                if (it->second.pending_list.size() > 0) {\n                    next = it->second.pending_list.front();\n                    next_version = it->second.version++;\n                    it->second.owner = next.owner;\n                    it->second.lease_callback = next.lease_callback;\n                    it->second.pending_list.pop_front();\n                } else {\n                    next.owner = \"\";\n                    it->second.owner = \"\";\n                    it->second.lease_callback = nullptr;\n                    it->second.version++;\n                }\n            }\n        }\n    }\n\n    auto t = tasking::enqueue(cb_code, nullptr, [=]() { cb(err); });\n\n    if (next.owner != \"\") {\n        error_code err = ERR_OK;\n        __lock_cb_bind_and_enqueue(next.grant_callback, err, next.owner, next_version, 0);\n    }\n\n    return t;\n}\n\ntask_ptr distributed_lock_service_simple::query_lock(const std::string &lock_id,\n                                                     task_code cb_code,\n                                                     const lock_callback &cb)\n{\n    error_code err;\n    std::string cowner;\n    uint64_t version = 0;\n\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it == _dlocks.end()) {\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            err = ERR_OK;\n            cowner = it->second.owner;\n            version = it->second.version;\n        }\n    }\n\n    return tasking::enqueue(cb_code, nullptr, [=]() { cb(err, cowner, version); });\n}\n\nerror_code distributed_lock_service_simple::query_cache(const std::string &lock_id,\n                                                        /*out*/ std::string &owner,\n                                                        /*out*/ uint64_t &version)\n{\n    error_code err;\n    {\n        zauto_lock l(_lock);\n        auto it = _dlocks.find(lock_id);\n        if (it == _dlocks.end()) {\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            err = ERR_OK;\n            owner = it->second.owner;\n            version = it->second.version;\n        }\n    }\n    return err;\n}\n}\n}\n"
  },
  {
    "path": "src/meta/distributed_lock_service_simple.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a simple version of distributed lock service for development\n *\n * Revision history:\n *     2015-11-04, @imzhenyu (Zhenyu.Guo@microsoft.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/distributed_lock_service.h>\n\nnamespace dsn {\nnamespace dist {\nclass distributed_lock_service_simple : public distributed_lock_service\n{\npublic:\n    virtual ~distributed_lock_service_simple() { _tracker.cancel_outstanding_tasks(); }\n    // no parameter need\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual error_code finalize() override { return ERR_OK; }\n\n    virtual std::pair<task_ptr, task_ptr> lock(const std::string &lock_id,\n                                               const std::string &myself_id,\n                                               task_code lock_cb_code,\n                                               const lock_callback &lock_cb,\n                                               task_code lease_expire_code,\n                                               const lock_callback &lease_expire_callback,\n                                               const lock_options &opt) override;\n\n    virtual task_ptr cancel_pending_lock(const std::string &lock_id,\n                                         const std::string &myself_id,\n                                         task_code cb_code,\n                                         const lock_callback &cb) override;\n\n    virtual task_ptr unlock(const std::string &lock_id,\n                            const std::string &myself_id,\n                            bool destroy,\n                            task_code cb_code,\n                            const err_callback &cb) override;\n\n    virtual task_ptr\n    query_lock(const std::string &lock_id, task_code cb_code, const lock_callback &cb) override;\n\n    virtual error_code query_cache(const std::string &lock_id,\n                                   /*out*/ std::string &owner,\n                                   /*out*/ uint64_t &version) override;\n\nprivate:\n    void random_lock_lease_expire(const std::string &lock_id);\n\nprivate:\n    struct lock_wait_info\n    {\n        task_ptr grant_callback;\n        task_ptr lease_callback;\n        std::string owner;\n    };\n\n    struct lock_info\n    {\n        std::string owner;\n        uint64_t version;\n        task_ptr lease_callback;\n        std::list<lock_wait_info> pending_list;\n    };\n\n    typedef std::unordered_map<std::string, lock_info> locks;\n\n    zlock _lock;\n    locks _dlocks; // lock -> owner\n\n    dsn::task_tracker _tracker;\n};\n}\n}\n"
  },
  {
    "path": "src/meta/dump_file.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     A simple dump file implementation for meta server, which can be used to dump meta's\n * server-state\n *\n * Revision history:\n *     2015-12-10, Weijie Sun(sunweijie at xiaomi.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#pragma once\n\n#include <dsn/utility/safe_strerror_posix.h>\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/crc.h>\n#include <cstdio>\n#include <cerrno>\n#include <iostream>\n\n#define log_error_and_return(buffer, length)                                                       \\\n    do {                                                                                           \\\n        ::dsn::utils::safe_strerror_r(errno, buffer, length);                                      \\\n        derror(\"append file failed, reason(%s)\", buffer);                                          \\\n        return -1;                                                                                 \\\n    } while (0)\n\nstruct block_header\n{\n    uint32_t length;\n    uint32_t crc32;\n};\n\nclass dump_file\n{\npublic:\n    ~dump_file() { fclose(_file_handle); }\n\n    static std::shared_ptr<dump_file> open_file(const char *filename, bool is_write)\n    {\n        std::shared_ptr<dump_file> res(new dump_file());\n        res->_filename = filename;\n        if (is_write)\n            res->_file_handle = fopen(filename, \"wb\");\n        else\n            res->_file_handle = fopen(filename, \"rb\");\n        res->_is_write = is_write;\n\n        if (res->_file_handle == nullptr)\n            return nullptr;\n        return res;\n    }\n\n    int append_buffer(const char *data, uint32_t data_length)\n    {\n        static __thread char msg_buffer[128];\n\n        dassert(_is_write, \"call append when open file with read mode\");\n\n        block_header hdr = {data_length, 0};\n        hdr.crc32 = dsn::utils::crc32_calc(data, data_length, _crc);\n        _crc = hdr.crc32;\n        size_t len = fwrite(&hdr, sizeof(hdr), 1, _file_handle);\n        if (len < 1) {\n            log_error_and_return(msg_buffer, 128);\n        }\n\n        len = 0;\n        while (len < data_length) {\n            size_t cnt = fwrite(data + len, 1, data_length - len, _file_handle);\n            if (len + cnt < data_length && errno != EINTR) {\n                log_error_and_return(msg_buffer, 128);\n            }\n            len += cnt;\n        }\n        return 0;\n    }\n    int append_buffer(const dsn::blob &data) { return append_buffer(data.data(), data.length()); }\n    int append_buffer(const std::string &data) { return append_buffer(data.c_str(), data.size()); }\n    int read_next_buffer(/*out*/ dsn::blob &output)\n    {\n        static __thread char msg_buffer[128];\n        dassert(!_is_write, \"call read next buffer when open file with write mode\");\n\n        block_header hdr;\n        size_t len = fread(&hdr, sizeof(hdr), 1, _file_handle);\n        if (len < 1) {\n            if (feof(_file_handle))\n                return 0;\n            else {\n                log_error_and_return(msg_buffer, 128);\n            }\n        }\n\n        std::shared_ptr<char> ptr(dsn::utils::make_shared_array<char>(hdr.length));\n        char *raw_mem = ptr.get();\n        len = 0;\n        while (len < hdr.length) {\n            size_t cnt = fread(raw_mem + len, 1, hdr.length - len, _file_handle);\n            if (len + cnt < hdr.length) {\n                if (feof(_file_handle)) {\n                    derror(\"unexpected file end, start offset of this block (%u)\",\n                           ftell(_file_handle) - len - sizeof(hdr));\n                    return -1;\n                } else if (errno != EINTR) {\n                    log_error_and_return(msg_buffer, 128);\n                }\n            }\n            len += cnt;\n        }\n        _crc = dsn::utils::crc32_calc(raw_mem, len, _crc);\n        if (_crc != hdr.crc32) {\n            derror(\"file %s data error, block offset(%ld)\",\n                   _filename.c_str(),\n                   ftell(_file_handle) - hdr.length - sizeof(hdr));\n            return -1;\n        }\n\n        output.assign(ptr, 0, hdr.length);\n        return 1;\n    }\n\nprivate:\n    dump_file() : _file_handle(nullptr), _crc(0) {}\n    bool _is_write; // true for write, false for read\n    FILE *_file_handle;\n    std::string _filename;\n    uint32_t _crc;\n};\n"
  },
  {
    "path": "src/meta/duplication/duplication_info.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"duplication_info.h\"\n#include \"meta/meta_data.h\"\n\n#include <rapidjson/prettywriter.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\n/*extern*/ void json_encode(dsn::json::JsonWriter &out, const duplication_status::type &s)\n{\n    json::json_encode(out, duplication_status_to_string(s));\n}\n\n/*extern*/ bool json_decode(const dsn::json::JsonObject &in, duplication_status::type &s)\n{\n    static const std::map<std::string, duplication_status::type>\n        _duplication_status_NAMES_TO_VALUES = {\n            {\"DS_INIT\", duplication_status::DS_INIT},\n            {\"DS_PREPARE\", duplication_status::DS_PREPARE},\n            {\"DS_APP\", duplication_status::DS_APP},\n            {\"DS_LOG\", duplication_status::DS_LOG},\n            {\"DS_PAUSE\", duplication_status::DS_PAUSE},\n            {\"DS_REMOVED\", duplication_status::DS_REMOVED},\n        };\n\n    std::string name;\n    json::json_decode(in, name);\n    auto it = _duplication_status_NAMES_TO_VALUES.find(name);\n    if (it != _duplication_status_NAMES_TO_VALUES.end()) {\n        s = it->second;\n        return true;\n    }\n    derror_f(\"unexpected duplication_status name: {}\", name);\n\n    // for forward compatibility issue, duplication of unexpected status\n    // will be marked as invisible.\n    s = duplication_status::DS_REMOVED;\n    return false;\n}\n\n/*extern*/ void json_encode(dsn::json::JsonWriter &out, const duplication_fail_mode::type &fmode)\n{\n    json::json_encode(out, duplication_fail_mode_to_string(fmode));\n}\n\n/*extern*/ bool json_decode(const dsn::json::JsonObject &in, duplication_fail_mode::type &fmode)\n{\n    static const std::map<std::string, duplication_fail_mode::type>\n        _duplication_fail_mode_NAMES_TO_VALUES = {\n            {\"FAIL_SLOW\", duplication_fail_mode::FAIL_SLOW},\n            {\"FAIL_SKIP\", duplication_fail_mode::FAIL_SKIP},\n            {\"FAIL_FAST\", duplication_fail_mode::FAIL_FAST},\n        };\n\n    std::string name;\n    json::json_decode(in, name);\n    auto it = _duplication_fail_mode_NAMES_TO_VALUES.find(name);\n    if (it != _duplication_fail_mode_NAMES_TO_VALUES.end()) {\n        fmode = it->second;\n        return true;\n    }\n    derror_f(\"unexpected duplication_fail_mode name: {}\", name);\n    // marked as default value.\n    fmode = duplication_fail_mode::FAIL_SLOW;\n    return false;\n}\n\n// lock held\nerror_code duplication_info::alter_status(duplication_status::type to_status,\n                                          duplication_fail_mode::type to_fail_mode)\n{\n    if (_is_altering) {\n        return ERR_BUSY;\n    }\n\n    if (_status == duplication_status::DS_REMOVED) {\n        return ERR_OBJECT_NOT_FOUND;\n    }\n\n    if (!is_valid_alteration(to_status)) {\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    if (_status == to_status && _fail_mode == to_fail_mode) {\n        return ERR_OK;\n    }\n\n    zauto_write_lock l(_lock);\n    _is_altering = true;\n    _next_status = to_status;\n    _next_fail_mode = to_fail_mode;\n    return ERR_OK;\n}\n\nvoid duplication_info::init_progress(int partition_index, decree d)\n{\n    zauto_write_lock l(_lock);\n\n    auto &p = _progress[partition_index];\n    p.volatile_decree = p.stored_decree = d;\n    p.is_inited = true;\n}\n\nbool duplication_info::alter_progress(int partition_index,\n                                      const duplication_confirm_entry &confirm_entry)\n{\n    zauto_write_lock l(_lock);\n\n    partition_progress &p = _progress[partition_index];\n    if (!p.is_inited) {\n        return false;\n    }\n    if (p.is_altering) {\n        return false;\n    }\n\n    p.checkpoint_prepared = confirm_entry.checkpoint_prepared;\n    if (p.volatile_decree < confirm_entry.confirmed_decree) {\n        p.volatile_decree = confirm_entry.confirmed_decree;\n    }\n    if (p.volatile_decree != p.stored_decree) {\n        // progress update is not supposed to be too frequent.\n        if (dsn_now_ms() > p.last_progress_update_ms + PROGRESS_UPDATE_PERIOD_MS) {\n            p.is_altering = true;\n            p.last_progress_update_ms = dsn_now_ms();\n            return true;\n        }\n    }\n    return false;\n}\n\nvoid duplication_info::persist_progress(int partition_index)\n{\n    zauto_write_lock l(_lock);\n\n    auto &p = _progress[partition_index];\n    dassert_dup(p.is_altering, this, \"partition_index: {}\", partition_index);\n    p.is_altering = false;\n    p.stored_decree = p.volatile_decree;\n}\n\nvoid duplication_info::persist_status()\n{\n    zauto_write_lock l(_lock);\n\n    if (!_is_altering) {\n        derror_dup(this, \"callers never write a duplication that is not altering to meta store\");\n        return;\n    }\n    ddebug_dup(this,\n               \"change duplication status from {} to {} successfully [app_id: {}]\",\n               duplication_status_to_string(_status),\n               duplication_status_to_string(_next_status),\n               app_id);\n\n    _is_altering = false;\n    _status = _next_status;\n    _next_status = duplication_status::DS_INIT;\n    _fail_mode = _next_fail_mode;\n}\n\nstd::string duplication_info::to_string() const\n{\n    return duplication_entry_to_string(to_duplication_entry());\n}\n\nblob duplication_info::to_json_blob() const\n{\n    json_helper copy;\n    copy.create_timestamp_ms = create_timestamp_ms;\n    copy.remote = follower_cluster_name;\n    copy.status = _next_status;\n    copy.fail_mode = _next_fail_mode;\n    return json::json_forwarder<json_helper>::encode(copy);\n}\n\nvoid duplication_info::report_progress_if_time_up()\n{\n    // progress report is not supposed to be too frequent.\n    if (dsn_now_ms() > _last_progress_report_ms + PROGRESS_REPORT_PERIOD_MS) {\n        _last_progress_report_ms = dsn_now_ms();\n        ddebug_f(\"duplication report: {}\", to_string());\n    }\n}\n\nduplication_info_s_ptr duplication_info::decode_from_blob(dupid_t dup_id,\n                                                          int32_t app_id,\n                                                          const std::string &app_name,\n                                                          int32_t partition_count,\n                                                          std::string store_path,\n                                                          const blob &json)\n{\n    json_helper info;\n    if (!json::json_forwarder<json_helper>::decode(json, info)) {\n        return nullptr;\n    }\n    std::vector<rpc_address> meta_list;\n    if (!dsn::replication::replica_helper::load_meta_servers(\n            meta_list, duplication_constants::kClustersSectionName.c_str(), info.remote.c_str())) {\n        return nullptr;\n    }\n\n    auto dup = std::make_shared<duplication_info>(dup_id,\n                                                  app_id,\n                                                  app_name,\n                                                  partition_count,\n                                                  info.create_timestamp_ms,\n                                                  std::move(info.remote),\n                                                  std::move(meta_list),\n                                                  std::move(store_path));\n    dup->_status = info.status;\n    dup->_fail_mode = info.fail_mode;\n    return dup;\n}\n\nvoid duplication_info::append_if_valid_for_query(\n    const app_state &app,\n    /*out*/ std::vector<duplication_entry> &entry_list) const\n{\n    zauto_read_lock l(_lock);\n\n    entry_list.emplace_back(to_duplication_entry());\n    duplication_entry &ent = entry_list.back();\n    // the confirmed decree is not useful for displaying\n    // the overall state of duplication\n    ent.__isset.progress = false;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/duplication/duplication_info.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include <utility>\n#include <fmt/format.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass app_state;\n\nclass duplication_info;\nusing duplication_info_s_ptr = std::shared_ptr<duplication_info>;\n\n/// This class is thread-safe.\nclass duplication_info\n{\npublic:\n    /// \\see meta_duplication_service::new_dup_from_init\n    /// \\see duplication_info::decode_from_blob\n    duplication_info(dupid_t dupid,\n                     int32_t appid,\n                     std::string app_name,\n                     int32_t partition_count,\n                     uint64_t create_now_ms,\n                     std::string follower_cluster_name,\n                     std::vector<rpc_address> &&follower_cluster_metas,\n                     std::string meta_store_path)\n        : id(dupid),\n          app_id(appid),\n          app_name(std::move(app_name)),\n          partition_count(partition_count),\n          follower_cluster_name(std::move(follower_cluster_name)),\n          follower_cluster_metas(std::move(follower_cluster_metas)),\n          store_path(std::move(meta_store_path)),\n          create_timestamp_ms(create_now_ms)\n    {\n        for (int i = 0; i < partition_count; i++) {\n            _progress[i] = {};\n        }\n    }\n\n    error_code start(bool is_duplicating_checkpoint = true)\n    {\n        if (is_duplicating_checkpoint) {\n            return alter_status(duplication_status::DS_PREPARE);\n        }\n        dwarn_f(\"you now create duplication[{}[{}.{}]] without duplicating checkpoint\",\n                id,\n                follower_cluster_name,\n                app_name);\n        return alter_status(duplication_status::DS_LOG);\n    }\n\n    // error will be returned if this state transition is not allowed.\n    error_code\n    alter_status(duplication_status::type to_status,\n                 duplication_fail_mode::type to_fail_mode = duplication_fail_mode::FAIL_SLOW);\n\n    // call this function after data has been persisted on meta storage.\n    void persist_status();\n\n    // not thread-safe\n    duplication_status::type status() const { return _status; }\n    duplication_fail_mode::type fail_mode() const { return _fail_mode; }\n\n    // if this duplication is in valid status.\n    bool is_invalid_status() const { return is_duplication_status_invalid(_status); }\n\n    bool is_valid_alteration(duplication_status::type to_status) const\n    {\n        return to_status == _status || (to_status == duplication_status::DS_PREPARE &&\n                                        _status == duplication_status::DS_INIT) ||\n               (to_status == duplication_status::DS_APP &&\n                _status == duplication_status::DS_PREPARE) ||\n               (to_status == duplication_status::DS_LOG &&\n                (_status == duplication_status::DS_PAUSE || _status == duplication_status::DS_APP ||\n                 _status == duplication_status::DS_INIT)) ||\n               (to_status == duplication_status::DS_PAUSE &&\n                _status == duplication_status::DS_LOG) ||\n               (to_status == duplication_status::DS_REMOVED);\n    };\n\n    ///\n    /// alter_progress -> persist_progress\n    ///\n\n    // Returns: false if `confirm_entry` is not supposed to be persisted,\n    //          maybe because meta storage is busy or `confirm_entry` is stale.\n    bool alter_progress(int partition_index, const duplication_confirm_entry &confirm_entry);\n\n    void persist_progress(int partition_index);\n\n    void init_progress(int partition_index, decree confirmed);\n\n    // Generates a json blob to be stored in meta storage.\n    // The status in json is `next_status`.\n    blob to_json_blob() const;\n\n    /// \\see meta_duplication_service::recover_from_meta_state\n    static duplication_info_s_ptr decode_from_blob(dupid_t dup_id,\n                                                   int32_t app_id,\n                                                   const std::string &app_name,\n                                                   int32_t partition_count,\n                                                   std::string store_path,\n                                                   const blob &json);\n\n    // duplication_query_rpc is handled in THREAD_POOL_META_SERVER,\n    // which is not thread safe for read.\n    void append_if_valid_for_query(const app_state &app,\n                                   /*out*/ std::vector<duplication_entry> &entry_list) const;\n\n    duplication_entry to_duplication_entry() const\n    {\n        duplication_entry entry;\n        entry.dupid = id;\n        entry.create_ts = create_timestamp_ms;\n        entry.remote = follower_cluster_name;\n        entry.status = _status;\n        entry.__set_fail_mode(_fail_mode);\n        entry.__isset.progress = true;\n        for (const auto &kv : _progress) {\n            if (!kv.second.is_inited) {\n                continue;\n            }\n            entry.progress[kv.first] = kv.second.stored_decree;\n        }\n        return entry;\n    }\n\n    bool all_checkpoint_has_prepared()\n    {\n        int prepared = 0;\n        bool completed =\n            std::all_of(_progress.begin(),\n                        _progress.end(),\n                        [&](std::pair<int, partition_progress> item) -> bool {\n                            prepared = item.second.checkpoint_prepared ? prepared + 1 : prepared;\n                            return item.second.checkpoint_prepared;\n                        });\n        if (!completed) {\n            dwarn_f(\"replica checkpoint still running: {}/{}\", prepared, _progress.size());\n        }\n        return completed;\n    }\n\n    void report_progress_if_time_up();\n\n    // This function should only be used for testing.\n    // Not thread-safe.\n    bool is_altering() const { return _is_altering; }\n\n    // Test util\n    bool equals_to(const duplication_info &rhs) const { return to_string() == rhs.to_string(); }\n\n    // To json encoded string.\n    std::string to_string() const;\n\nprivate:\n    friend class duplication_info_test;\n    friend class meta_duplication_service_test;\n\n    // Whether there's ongoing meta storage update.\n    bool _is_altering{false};\n\n    mutable zrwlock_nr _lock;\n\n    static constexpr int PROGRESS_UPDATE_PERIOD_MS = 5000;          // 5s\n    static constexpr int PROGRESS_REPORT_PERIOD_MS = 1000 * 60 * 5; // 5min\n\n    struct partition_progress\n    {\n        int64_t volatile_decree{invalid_decree};\n        int64_t stored_decree{invalid_decree};\n        bool is_altering{false};\n        uint64_t last_progress_update_ms{0};\n        bool is_inited{false};\n        bool checkpoint_prepared{false};\n    };\n\n    // partition_idx => progress\n    std::map<int, partition_progress> _progress;\n\n    uint64_t _last_progress_report_ms{0};\n\n    duplication_status::type _status{duplication_status::DS_INIT};\n    duplication_status::type _next_status{duplication_status::DS_INIT};\n\n    duplication_fail_mode::type _fail_mode{duplication_fail_mode::FAIL_SLOW};\n    duplication_fail_mode::type _next_fail_mode{duplication_fail_mode::FAIL_SLOW};\n    struct json_helper\n    {\n        std::string remote;\n        duplication_status::type status;\n        int64_t create_timestamp_ms;\n        duplication_fail_mode::type fail_mode;\n\n        DEFINE_JSON_SERIALIZATION(remote, status, create_timestamp_ms, fail_mode);\n    };\n\npublic:\n    const dupid_t id{0};\n    const int32_t app_id{0};\n    const std::string app_name;\n    const int32_t partition_count{0};\n\n    const std::string follower_cluster_name;\n    const std::vector<rpc_address> follower_cluster_metas;\n    const std::string store_path; // store path on meta service = get_duplication_path(app, dupid)\n    const uint64_t create_timestamp_ms{0}; // the time when this dup is created.\n};\n\nextern void json_encode(dsn::json::JsonWriter &out, const duplication_status::type &s);\n\nextern bool json_decode(const dsn::json::JsonObject &in, duplication_status::type &s);\n\nextern void json_encode(dsn::json::JsonWriter &out, const duplication_fail_mode::type &s);\n\nextern bool json_decode(const dsn::json::JsonObject &in, duplication_fail_mode::type &s);\n\n// Macros for writing log message prefixed by appid and dupid.\n#define ddebug_dup(_dup_, ...)                                                                     \\\n    ddebug_f(\"[a{}d{}] {}\", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__));\n#define dwarn_dup(_dup_, ...)                                                                      \\\n    dwarn_f(\"[a{}d{}] {}\", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__));\n#define derror_dup(_dup_, ...)                                                                     \\\n    derror_f(\"[a{}d{}] {}\", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__));\n#define dfatal_dup(_dup_, ...)                                                                     \\\n    dfatal_f(\"[a{}d{}] {}\", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__));\n#define dassert_dup(_pred_, _dup_, ...)                                                            \\\n    dassert_f(_pred_, \"[a{}d{}] {}\", _dup_->app_id, _dup_->id, fmt::format(__VA_ARGS__));\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/duplication/meta_duplication_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/common.h>\n#include <dsn/utility/chrono_literals.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/tool-api/group_address.h>\n\n#include \"meta/meta_service.h\"\n#include \"meta_duplication_service.h\"\n#include \"dsn/utility/fail_point.h\"\n\nnamespace dsn {\nnamespace replication {\n\nusing namespace literals::chrono_literals;\n\n// ThreadPool(READ): THREAD_POOL_META_SERVER\nvoid meta_duplication_service::query_duplication_info(const duplication_query_request &request,\n                                                      duplication_query_response &response)\n{\n    ddebug_f(\"query duplication info for app: {}\", request.app_name);\n\n    response.err = ERR_OK;\n    {\n        zauto_read_lock l(app_lock());\n        std::shared_ptr<app_state> app = _state->get_app(request.app_name);\n        if (!app || app->status != app_status::AS_AVAILABLE) {\n            response.err = ERR_APP_NOT_EXIST;\n        } else {\n            response.appid = app->app_id;\n            for (auto &dup_id_to_info : app->duplications) {\n                const duplication_info_s_ptr &dup = dup_id_to_info.second;\n                dup->append_if_valid_for_query(*app, response.entry_list);\n            }\n        }\n    }\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::modify_duplication(duplication_modify_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n\n    ddebug_f(\"modify duplication({}) to [status={},fail_mode={}] for app({})\",\n             request.dupid,\n             request.__isset.status ? duplication_status_to_string(request.status) : \"nil\",\n             request.__isset.fail_mode ? duplication_fail_mode_to_string(request.fail_mode) : \"nil\",\n             request.app_name);\n\n    dupid_t dupid = request.dupid;\n\n    std::shared_ptr<app_state> app = _state->get_app(request.app_name);\n    if (!app || app->status != app_status::AS_AVAILABLE) {\n        response.err = ERR_APP_NOT_EXIST;\n        return;\n    }\n\n    auto it = app->duplications.find(dupid);\n    if (it == app->duplications.end()) {\n        response.err = ERR_OBJECT_NOT_FOUND;\n        return;\n    }\n\n    duplication_info_s_ptr dup = it->second;\n    auto to_status = request.__isset.status ? request.status : dup->status();\n    auto to_fail_mode = request.__isset.fail_mode ? request.fail_mode : dup->fail_mode();\n    response.err = dup->alter_status(to_status, to_fail_mode);\n    if (response.err != ERR_OK) {\n        return;\n    }\n    if (!dup->is_altering()) {\n        return;\n    }\n\n    // validation passed\n    do_modify_duplication(app, dup, rpc);\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::do_modify_duplication(std::shared_ptr<app_state> &app,\n                                                     duplication_info_s_ptr &dup,\n                                                     duplication_modify_rpc &rpc)\n{\n    if (rpc.request().status == duplication_status::DS_REMOVED) {\n        _meta_svc->get_meta_storage()->delete_node_recursively(\n            std::string(dup->store_path), [rpc, this, app, dup]() {\n                dup->persist_status();\n                rpc.response().err = ERR_OK;\n                rpc.response().appid = app->app_id;\n\n                if (rpc.request().status == duplication_status::DS_REMOVED) {\n                    zauto_write_lock l(app_lock());\n                    app->duplications.erase(dup->id);\n                    refresh_duplicating_no_lock(app);\n                }\n            });\n        return;\n    }\n    // store the duplication in requested status.\n    blob value = dup->to_json_blob();\n    _meta_svc->get_meta_storage()->set_data(\n        std::string(dup->store_path), std::move(value), [rpc, app, dup]() {\n            dup->persist_status();\n            rpc.response().err = ERR_OK;\n            rpc.response().appid = app->app_id;\n        });\n}\n\n// This call will not recreate if the duplication\n// with the same app name and remote end point already exists.\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::add_duplication(duplication_add_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n\n    ddebug_f(\"add duplication for app({}), remote cluster name is {}\",\n             request.app_name,\n             request.remote_cluster_name);\n\n    response.err = ERR_OK;\n\n    if (request.remote_cluster_name == get_current_cluster_name()) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.__set_hint(\"illegal operation: adding duplication to itself\");\n        return;\n    }\n    auto remote_cluster_id = get_duplication_cluster_id(request.remote_cluster_name);\n    if (!remote_cluster_id.is_ok()) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.__set_hint(fmt::format(\"get_duplication_cluster_id({}) failed, error: {}\",\n                                        request.remote_cluster_name,\n                                        remote_cluster_id.get_error()));\n        return;\n    }\n\n    std::vector<rpc_address> meta_list;\n    if (!dsn::replication::replica_helper::load_meta_servers(\n            meta_list,\n            duplication_constants::kClustersSectionName.c_str(),\n            request.remote_cluster_name.c_str())) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.__set_hint(fmt::format(\"failed to find cluster[{}] address in config [{}]\",\n                                        request.remote_cluster_name,\n                                        duplication_constants::kClustersSectionName));\n        return;\n    }\n\n    auto app = _state->get_app(request.app_name);\n    if (!app || app->status != app_status::AS_AVAILABLE) {\n        response.err = ERR_APP_NOT_EXIST;\n        return;\n    }\n    duplication_info_s_ptr dup;\n    for (const auto &ent : app->duplications) {\n        auto it = ent.second;\n        if (it->follower_cluster_name == request.remote_cluster_name) {\n            dup = ent.second;\n            break;\n        }\n    }\n    if (!dup) {\n        dup = new_dup_from_init(request.remote_cluster_name, std::move(meta_list), app);\n    }\n    do_add_duplication(app, dup, rpc);\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::do_add_duplication(std::shared_ptr<app_state> &app,\n                                                  duplication_info_s_ptr &dup,\n                                                  duplication_add_rpc &rpc)\n{\n    const auto err = dup->start(rpc.request().is_duplicating_checkpoint);\n    if (dsn_unlikely(err != ERR_OK)) {\n        derror_f(\"start dup[{}({})] failed: err = {}\", app->app_name, dup->id, err.to_string());\n        return;\n    }\n    blob value = dup->to_json_blob();\n\n    std::queue<std::string> nodes({get_duplication_path(*app), std::to_string(dup->id)});\n    _meta_svc->get_meta_storage()->create_node_recursively(\n        std::move(nodes), std::move(value), [app, this, dup, rpc]() mutable {\n            ddebug_dup(dup,\n                       \"add duplication successfully [app_name: {}, follower: {}]\",\n                       app->app_name,\n                       dup->follower_cluster_name);\n\n            // The duplication starts only after it's been persisted.\n            dup->persist_status();\n\n            auto &resp = rpc.response();\n            resp.err = ERR_OK;\n            resp.appid = app->app_id;\n            resp.dupid = dup->id;\n\n            zauto_write_lock l(app_lock());\n            refresh_duplicating_no_lock(app);\n        });\n}\n\n/// get all available apps on node `ns`\nvoid meta_duplication_service::get_all_available_app(\n    const node_state &ns, std::map<int32_t, std::shared_ptr<app_state>> &app_map) const\n{\n    ns.for_each_partition([this, &ns, &app_map](const gpid &pid) -> bool {\n        if (ns.served_as(pid) != partition_status::PS_PRIMARY) {\n            return true;\n        }\n\n        std::shared_ptr<app_state> app = _state->get_app(pid.get_app_id());\n        if (!app || app->status != app_status::AS_AVAILABLE) {\n            return true;\n        }\n\n        // must have duplication\n        if (app->duplications.empty()) {\n            return true;\n        }\n\n        if (app_map.find(app->app_id) == app_map.end()) {\n            app_map.emplace(std::make_pair(pid.get_app_id(), std::move(app)));\n        }\n        return true;\n    });\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::duplication_sync(duplication_sync_rpc rpc)\n{\n    auto &request = rpc.request();\n    auto &response = rpc.response();\n    response.err = ERR_OK;\n\n    node_state *ns = get_node_state(_state->_nodes, request.node, false);\n    if (ns == nullptr) {\n        dwarn_f(\"node({}) is not found in meta server\", request.node.to_string());\n        response.err = ERR_OBJECT_NOT_FOUND;\n        return;\n    }\n\n    std::map<int32_t, std::shared_ptr<app_state>> app_map;\n    get_all_available_app(*ns, app_map);\n    for (const auto &kv : app_map) {\n        int32_t app_id = kv.first;\n        const auto &app = kv.second;\n\n        for (const auto &kv2 : app->duplications) {\n            dupid_t dup_id = kv2.first;\n            const auto &dup = kv2.second;\n            if (dup->is_invalid_status()) {\n                continue;\n            }\n\n            if (dup->status() < duplication_status::DS_LOG && dup->all_checkpoint_has_prepared()) {\n                if (dup->status() == duplication_status::DS_PREPARE) {\n                    create_follower_app_for_duplication(dup, app);\n                } else if (dup->status() == duplication_status::DS_APP) {\n                    check_follower_app_if_create_completed(dup);\n                }\n            }\n\n            response.dup_map[app_id][dup_id] = dup->to_duplication_entry();\n\n            // report progress periodically for each duplications\n            dup->report_progress_if_time_up();\n        }\n    }\n\n    /// update progress\n    for (const auto &kv : request.confirm_list) {\n        gpid gpid = kv.first;\n\n        auto it = app_map.find(gpid.get_app_id());\n        if (it == app_map.end()) {\n            // app is unsynced\n            // Since duplication-sync separates with config-sync, it's not guaranteed to have the\n            // latest state. duplication-sync has a loose consistency requirement.\n            continue;\n        }\n        std::shared_ptr<app_state> &app = it->second;\n\n        for (const duplication_confirm_entry &confirm : kv.second) {\n            auto it2 = app->duplications.find(confirm.dupid);\n            if (it2 == app->duplications.end()) {\n                // dup is unsynced\n                continue;\n            }\n\n            duplication_info_s_ptr &dup = it2->second;\n            if (dup->is_invalid_status()) {\n                continue;\n            }\n            do_update_partition_confirmed(dup, rpc, gpid.get_partition_index(), confirm);\n        }\n    }\n}\n\nvoid meta_duplication_service::create_follower_app_for_duplication(\n    const std::shared_ptr<duplication_info> &dup, const std::shared_ptr<app_state> &app)\n{\n    configuration_create_app_request request;\n    request.app_name = app->app_name;\n    request.options.app_type = app->app_type;\n    request.options.partition_count = app->partition_count;\n    request.options.replica_count = app->max_replica_count;\n    request.options.success_if_exist = false;\n    request.options.envs = app->envs;\n    request.options.is_stateful = app->is_stateful;\n\n    // add envs for follower table, which will use it know itself is `follower` and load master info\n    // - env map:\n    // `kDuplicationEnvMasterClusterKey=>{master_cluster_name}`\n    // `kDuplicationEnvMasterMetasKey=>{master_meta_list}`\n    request.options.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey,\n                                 get_current_cluster_name());\n    request.options.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                                 _meta_svc->get_meta_list_string());\n\n    rpc_address meta_servers;\n    meta_servers.assign_group(dup->follower_cluster_name.c_str());\n    meta_servers.group_address()->add_list(dup->follower_cluster_metas);\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CREATE_APP);\n    dsn::marshall(msg, request);\n    rpc::call(\n        meta_servers,\n        msg,\n        _meta_svc->tracker(),\n        [=](error_code err, configuration_create_app_response &&resp) mutable {\n            FAIL_POINT_INJECT_NOT_RETURN_F(\"update_app_request_ok\",\n                                           [&](string_view s) -> void { err = ERR_OK; });\n            error_code create_err = err == ERR_OK ? resp.err : err;\n            error_code update_err = ERR_NO_NEED_OPERATE;\n\n            FAIL_POINT_INJECT_NOT_RETURN_F(\"persist_dup_status_failed\",\n                                           [&](string_view s) -> void { create_err = ERR_OK; });\n            if (create_err == ERR_OK) {\n                update_err = dup->alter_status(duplication_status::DS_APP);\n            }\n\n            FAIL_POINT_INJECT_F(\"persist_dup_status_failed\",\n                                [&](string_view s) -> void { return; });\n            if (update_err == ERR_OK) {\n                blob value = dup->to_json_blob();\n                // Note: this function is `async`, it may not be persisted completed\n                // after executing, now using `_is_altering` to judge whether `updating` or\n                // `completed`, if `_is_altering`, dup->alter_status() will return `ERR_BUSY`\n                _meta_svc->get_meta_storage()->set_data(std::string(dup->store_path),\n                                                        std::move(value),\n                                                        [=]() { dup->persist_status(); });\n            } else {\n                derror_f(\"created follower app[{}.{}] to trigger duplicate checkpoint failed: \"\n                         \"duplication_status = {}, create_err = {}, update_err = {}\",\n                         dup->follower_cluster_name,\n                         dup->app_name,\n                         duplication_status_to_string(dup->status()),\n                         create_err.to_string(),\n                         update_err.to_string());\n            }\n        });\n}\n\nvoid meta_duplication_service::check_follower_app_if_create_completed(\n    const std::shared_ptr<duplication_info> &dup)\n{\n    rpc_address meta_servers;\n    meta_servers.assign_group(dup->follower_cluster_name.c_str());\n    meta_servers.group_address()->add_list(dup->follower_cluster_metas);\n\n    configuration_query_by_index_request meta_config_request;\n    meta_config_request.app_name = dup->app_name;\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n    dsn::marshall(msg, meta_config_request);\n    rpc::call(meta_servers,\n              msg,\n              _meta_svc->tracker(),\n              [=](error_code err, configuration_query_by_index_response &&resp) mutable {\n                  FAIL_POINT_INJECT_NOT_RETURN_F(\"create_app_ok\", [&](string_view s) -> void {\n                      err = ERR_OK;\n                      int count = dup->partition_count;\n                      while (count-- > 0) {\n                          partition_configuration p;\n                          p.primary = rpc_address(\"127.0.0.1\", 34801);\n                          p.secondaries.emplace_back(rpc_address(\"127.0.0.2\", 34801));\n                          p.secondaries.emplace_back(rpc_address(\"127.0.0.3\", 34801));\n                          resp.partitions.emplace_back(p);\n                      }\n                  });\n\n                  // - ERR_INCONSISTENT_STATE: partition count of response isn't equal with local\n                  // - ERR_INACTIVE_STATE: the follower table hasn't been healthy\n                  error_code query_err = err == ERR_OK ? resp.err : err;\n                  if (query_err == ERR_OK) {\n                      if (resp.partitions.size() != dup->partition_count) {\n                          query_err = ERR_INCONSISTENT_STATE;\n                      } else {\n                          for (const auto &partition : resp.partitions) {\n                              if (partition.primary.is_invalid()) {\n                                  query_err = ERR_INACTIVE_STATE;\n                                  break;\n                              }\n\n                              if (partition.secondaries.empty()) {\n                                  query_err = ERR_NOT_ENOUGH_MEMBER;\n                                  break;\n                              }\n\n                              for (const auto &secondary : partition.secondaries) {\n                                  if (secondary.is_invalid()) {\n                                      query_err = ERR_INACTIVE_STATE;\n                                      break;\n                                  }\n                              }\n                          }\n                      }\n                  }\n\n                  error_code update_err = ERR_NO_NEED_OPERATE;\n                  if (query_err == ERR_OK) {\n                      update_err = dup->alter_status(duplication_status::DS_LOG);\n                  }\n\n                  FAIL_POINT_INJECT_F(\"persist_dup_status_failed\",\n                                      [&](string_view s) -> void { return; });\n                  if (update_err == ERR_OK) {\n                      blob value = dup->to_json_blob();\n                      // Note: this function is `async`, it may not be persisted completed\n                      // after executing, now using `_is_altering` to judge whether `updating` or\n                      // `completed`, if `_is_altering`, dup->alter_status() will return `ERR_BUSY`\n                      _meta_svc->get_meta_storage()->set_data(std::string(dup->store_path),\n                                                              std::move(value),\n                                                              [dup]() { dup->persist_status(); });\n                  } else {\n                      derror_f(\"query follower app[{}.{}] replica configuration completed, result: \"\n                               \"duplication_status = {}, query_err = {}, update_err = {}\",\n                               dup->follower_cluster_name,\n                               dup->app_name,\n                               duplication_status_to_string(dup->status()),\n                               query_err.to_string(),\n                               update_err);\n                  }\n              });\n}\n\nvoid meta_duplication_service::do_update_partition_confirmed(\n    duplication_info_s_ptr &dup,\n    duplication_sync_rpc &rpc,\n    int32_t partition_idx,\n    const duplication_confirm_entry &confirm_entry)\n{\n    if (dup->alter_progress(partition_idx, confirm_entry)) {\n        std::string path = get_partition_path(dup, std::to_string(partition_idx));\n        blob value = blob::create_from_bytes(std::to_string(confirm_entry.confirmed_decree));\n\n        _meta_svc->get_meta_storage()->get_data(std::string(path), [=](const blob &data) mutable {\n            if (data.length() == 0) {\n                _meta_svc->get_meta_storage()->create_node(\n                    std::string(path), std::move(value), [=]() mutable {\n                        dup->persist_progress(partition_idx);\n                        rpc.response().dup_map[dup->app_id][dup->id].progress[partition_idx] =\n                            confirm_entry.confirmed_decree;\n                    });\n            } else {\n                _meta_svc->get_meta_storage()->set_data(\n                    std::string(path), std::move(value), [=]() mutable {\n                        dup->persist_progress(partition_idx);\n                        rpc.response().dup_map[dup->app_id][dup->id].progress[partition_idx] =\n                            confirm_entry.confirmed_decree;\n                    });\n            }\n\n            // duplication_sync_rpc will finally be replied when confirmed points\n            // of all partitions are stored.\n        });\n    }\n}\n\nstd::shared_ptr<duplication_info>\nmeta_duplication_service::new_dup_from_init(const std::string &follower_cluster_name,\n                                            std::vector<rpc_address> &&follower_cluster_metas,\n                                            std::shared_ptr<app_state> &app) const\n{\n    duplication_info_s_ptr dup;\n\n    // use current time to identify this duplication.\n    auto dupid = static_cast<dupid_t>(dsn_now_ms() / 1000);\n    {\n        zauto_write_lock l(app_lock());\n\n        // hold write lock here to ensure that dupid is unique\n        while (app->duplications.find(dupid) != app->duplications.end())\n            dupid++;\n\n        std::string dup_path = get_duplication_path(*app, std::to_string(dupid));\n        dup = std::make_shared<duplication_info>(dupid,\n                                                 app->app_id,\n                                                 app->app_name,\n                                                 app->partition_count,\n                                                 dsn_now_ms(),\n                                                 follower_cluster_name,\n                                                 std::move(follower_cluster_metas),\n                                                 std::move(dup_path));\n        for (int32_t i = 0; i < app->partition_count; i++) {\n            dup->init_progress(i, invalid_decree);\n        }\n\n        app->duplications.emplace(dup->id, dup);\n    }\n\n    return dup;\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::recover_from_meta_state()\n{\n    ddebug_f(\"recovering duplication states from meta storage\");\n\n    // /<app>/duplication/<dupid>/<partition_idx>\n    //                       |         |-> confirmed_decree\n    //                       |\n    //                       |-> json of dup info\n\n    for (const auto &kv : _state->_exist_apps) {\n        std::shared_ptr<app_state> app = kv.second;\n        if (app->status != app_status::AS_AVAILABLE) {\n            continue;\n        }\n\n        _meta_svc->get_meta_storage()->get_children(\n            get_duplication_path(*app),\n            [this, app](bool node_exists, const std::vector<std::string> &dup_id_list) {\n                if (!node_exists) {\n                    // if there's no duplication\n                    return;\n                }\n                for (const std::string &raw_dup_id : dup_id_list) {\n                    dupid_t dup_id;\n                    if (!buf2int32(raw_dup_id, dup_id)) {\n                        // unlikely\n                        derror_f(\"invalid duplication path: {}\",\n                                 get_duplication_path(*app, raw_dup_id));\n                        return;\n                    }\n                    do_restore_duplication(dup_id, app);\n                }\n            });\n    }\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::do_restore_duplication_progress(\n    const duplication_info_s_ptr &dup, const std::shared_ptr<app_state> &app)\n{\n    for (int partition_idx = 0; partition_idx < app->partition_count; partition_idx++) {\n        std::string str_pidx = std::to_string(partition_idx);\n\n        // <app_path>/duplication/<dup_id>/<partition_index>\n        std::string partition_path = get_partition_path(dup, str_pidx);\n\n        _meta_svc->get_meta_storage()->get_data(\n            std::move(partition_path), [dup, partition_idx](const blob &value) {\n                // value is confirmed_decree encoded in string.\n\n                if (value.size() == 0) {\n                    // not found\n                    dup->init_progress(partition_idx, invalid_decree);\n                    return;\n                }\n\n                int64_t confirmed_decree = invalid_decree;\n                if (!buf2int64(value, confirmed_decree)) {\n                    derror_dup(dup,\n                               \"invalid confirmed_decree {} on partition_idx {}\",\n                               value.to_string(),\n                               partition_idx);\n                    return; // fail fast\n                }\n\n                dup->init_progress(partition_idx, confirmed_decree);\n\n                ddebug_dup(dup,\n                           \"initialize progress from metastore [partition_idx: {}, confirmed: {}]\",\n                           partition_idx,\n                           confirmed_decree);\n            });\n    }\n}\n\n// ThreadPool(WRITE): THREAD_POOL_META_STATE\nvoid meta_duplication_service::do_restore_duplication(dupid_t dup_id,\n                                                      std::shared_ptr<app_state> app)\n{\n    std::string store_path = get_duplication_path(*app, std::to_string(dup_id));\n\n    // restore duplication info from json\n    _meta_svc->get_meta_storage()->get_data(\n        std::string(store_path),\n        [ dup_id, this, app = std::move(app), store_path ](const blob &json) {\n            zauto_write_lock l(app_lock());\n\n            auto dup = duplication_info::decode_from_blob(\n                dup_id, app->app_id, app->app_name, app->partition_count, store_path, json);\n            if (nullptr == dup) {\n                derror_f(\"failed to decode json \\\"{}\\\" on path {}\", json.to_string(), store_path);\n                return; // fail fast\n            }\n            if (!dup->is_invalid_status()) {\n                app->duplications[dup->id] = dup;\n                refresh_duplicating_no_lock(app);\n\n                // restore progress\n                do_restore_duplication_progress(dup, app);\n            }\n        });\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/duplication/meta_duplication_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta/server_state.h\"\n#include \"meta/meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\n\n/// On meta storage, duplication info are stored in the following layout:\n///\n///   <app_path>/duplication/<dup_id> -> {\n///                                         \"remote\": ...,\n///                                         \"status\": ...,\n///                                         \"create_timestamp_ms\": ...,\n///                                      }\n///\n///   <app_path>/duplication/<dup_id>/<partition_index> -> <confirmed_decree>\n///\n/// Each app has an attribute called \"duplicating\" which indicates\n/// whether this app should prevent its unconfirmed WAL from being compacted.\n///\n\n/// Ref-Issue: https://github.com/apache/incubator-pegasus/issues/892\nclass meta_duplication_service\n{\npublic:\n    meta_duplication_service(server_state *ss, meta_service *ms) : _state(ss), _meta_svc(ms)\n    {\n        dassert(_state, \"_state should not be null\");\n        dassert(_meta_svc, \"_meta_svc should not be null\");\n    }\n\n    /// See replication.thrift for possible errors for each rpc.\n\n    void query_duplication_info(const duplication_query_request &, duplication_query_response &);\n\n    void add_duplication(duplication_add_rpc rpc);\n\n    void modify_duplication(duplication_modify_rpc rpc);\n\n    void duplication_sync(duplication_sync_rpc rpc);\n\n    // Recover from meta state storage.\n    void recover_from_meta_state();\n\nprivate:\n    void do_add_duplication(std::shared_ptr<app_state> &app,\n                            duplication_info_s_ptr &dup,\n                            duplication_add_rpc &rpc);\n\n    void do_modify_duplication(std::shared_ptr<app_state> &app,\n                               duplication_info_s_ptr &dup,\n                               duplication_modify_rpc &rpc);\n\n    void do_restore_duplication(dupid_t dup_id, std::shared_ptr<app_state> app);\n\n    void do_restore_duplication_progress(const duplication_info_s_ptr &dup,\n                                         const std::shared_ptr<app_state> &app);\n\n    void get_all_available_app(const node_state &ns,\n                               std::map<int32_t, std::shared_ptr<app_state>> &app_map) const;\n\n    void do_update_partition_confirmed(duplication_info_s_ptr &dup,\n                                       duplication_sync_rpc &rpc,\n                                       int32_t partition_idx,\n                                       const duplication_confirm_entry &confirm_entry);\n\n    void create_follower_app_for_duplication(const std::shared_ptr<duplication_info> &dup,\n                                             const std::shared_ptr<app_state> &app);\n    void check_follower_app_if_create_completed(const std::shared_ptr<duplication_info> &dup);\n\n    // Get zk path for duplication.\n    std::string get_duplication_path(const app_state &app) const\n    {\n        return _state->get_app_path(app) + \"/duplication\";\n    }\n    std::string get_duplication_path(const app_state &app, const std::string &dupid) const\n    {\n        return get_duplication_path(app) + \"/\" + dupid;\n    }\n    static std::string get_partition_path(const duplication_info_s_ptr &dup,\n                                          const std::string &partition_idx)\n    {\n        return dup->store_path + \"/\" + partition_idx;\n    }\n\n    // Create a new duplication from INIT state.\n    // Thread-Safe\n    std::shared_ptr<duplication_info>\n    new_dup_from_init(const std::string &follower_cluster_name,\n                      std::vector<rpc_address> &&follower_cluster_metas,\n                      std::shared_ptr<app_state> &app) const;\n\n    // get lock to protect access of app table\n    zrwlock_nr &app_lock() const { return _state->_lock; }\n\n    // `duplicating` will be set to true if any dup is valid among app->duplications.\n    // ensure app_lock (write lock) is held before calling this function\n    static void refresh_duplicating_no_lock(const std::shared_ptr<app_state> &app)\n    {\n        for (const auto &kv : app->duplications) {\n            const auto &dup = kv.second;\n            if (!dup->is_invalid_status()) {\n                app->__set_duplicating(true);\n                return;\n            }\n        }\n        app->__set_duplicating(false);\n    }\n\nprivate:\n    friend class meta_duplication_service_test;\n\n    server_state *_state;\n\n    meta_service *_meta_svc;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/greedy_load_balancer.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <iostream>\n#include <queue>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/utility/math.h>\n#include <dsn/utility/utils.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n#include \"greedy_load_balancer.h\"\n#include \"meta_data.h\"\n#include \"meta_admin_types.h\"\n#include \"app_balance_policy.h\"\n#include \"cluster_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\nDSN_DEFINE_bool(\"meta_server\", balance_cluster, false, \"whether to enable cluster balancer\");\nDSN_TAG_VARIABLE(balance_cluster, FT_MUTABLE);\n\nDSN_DECLARE_uint64(min_live_node_count_for_unfreeze);\n\ngreedy_load_balancer::greedy_load_balancer(meta_service *_svc)\n    : server_load_balancer(_svc), _get_balance_operation_count(nullptr)\n{\n    _app_balance_policy = dsn::make_unique<app_balance_policy>(_svc);\n    _cluster_balance_policy = dsn::make_unique<cluster_balance_policy>(_svc);\n\n    ::memset(t_operation_counters, 0, sizeof(t_operation_counters));\n\n    // init perf counters\n    _balance_operation_count.init_app_counter(\"eon.greedy_balancer\",\n                                              \"balance_operation_count\",\n                                              COUNTER_TYPE_NUMBER,\n                                              \"balance operation count to be done\");\n    _recent_balance_move_primary_count.init_app_counter(\n        \"eon.greedy_balancer\",\n        \"recent_balance_move_primary_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"move primary count by balancer in the recent period\");\n    _recent_balance_copy_primary_count.init_app_counter(\n        \"eon.greedy_balancer\",\n        \"recent_balance_copy_primary_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"copy primary count by balancer in the recent period\");\n    _recent_balance_copy_secondary_count.init_app_counter(\n        \"eon.greedy_balancer\",\n        \"recent_balance_copy_secondary_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"copy secondary count by balancer in the recent period\");\n}\n\ngreedy_load_balancer::~greedy_load_balancer() { unregister_ctrl_commands(); }\n\nvoid greedy_load_balancer::register_ctrl_commands()\n{\n    _get_balance_operation_count = dsn::command_manager::instance().register_command(\n        {\"meta.lb.get_balance_operation_count\"},\n        \"meta.lb.get_balance_operation_count [total | move_pri | copy_pri | copy_sec | detail]\",\n        \"get balance operation count\",\n        [this](const std::vector<std::string> &args) { return get_balance_operation_count(args); });\n}\n\nvoid greedy_load_balancer::unregister_ctrl_commands()\n{\n    UNREGISTER_VALID_HANDLER(_get_balance_operation_count);\n}\n\nstd::string greedy_load_balancer::get_balance_operation_count(const std::vector<std::string> &args)\n{\n    if (args.empty()) {\n        return std::string(\"total=\" + std::to_string(t_operation_counters[ALL_COUNT]));\n    }\n\n    if (args[0] == \"total\") {\n        return std::string(\"total=\" + std::to_string(t_operation_counters[ALL_COUNT]));\n    }\n\n    std::string result(\"unknown\");\n    if (args[0] == \"move_pri\")\n        result = std::string(\"move_pri=\" + std::to_string(t_operation_counters[MOVE_PRI_COUNT]));\n    else if (args[0] == \"copy_pri\")\n        result = std::string(\"copy_pri=\" + std::to_string(t_operation_counters[COPY_PRI_COUNT]));\n    else if (args[0] == \"copy_sec\")\n        result = std::string(\"copy_sec=\" + std::to_string(t_operation_counters[COPY_SEC_COUNT]));\n    else if (args[0] == \"detail\")\n        result = std::string(\"move_pri=\" + std::to_string(t_operation_counters[MOVE_PRI_COUNT]) +\n                             \",copy_pri=\" + std::to_string(t_operation_counters[COPY_PRI_COUNT]) +\n                             \",copy_sec=\" + std::to_string(t_operation_counters[COPY_SEC_COUNT]) +\n                             \",total=\" + std::to_string(t_operation_counters[ALL_COUNT]));\n    else\n        result = std::string(\"ERR: invalid arguments\");\n\n    return result;\n}\n\nvoid greedy_load_balancer::score(meta_view view, double &primary_stddev, double &total_stddev)\n{\n    // Calculate stddev of primary and partition count for current meta-view\n    std::vector<uint32_t> primary_count;\n    std::vector<uint32_t> partition_count;\n\n    primary_stddev = 0.0;\n    total_stddev = 0.0;\n\n    bool primary_partial_sample = false;\n    bool partial_sample = false;\n\n    for (auto iter = view.nodes->begin(); iter != view.nodes->end(); ++iter) {\n        const node_state &node = iter->second;\n        if (node.alive()) {\n            if (node.partition_count() != 0) {\n                primary_count.emplace_back(node.primary_count());\n                partition_count.emplace_back(node.partition_count());\n            }\n        } else {\n            if (node.primary_count() != 0) {\n                primary_partial_sample = true;\n            }\n            if (node.partition_count() != 0) {\n                partial_sample = true;\n            }\n        }\n    }\n\n    if (primary_count.size() <= 1 || partition_count.size() <= 1)\n        return;\n\n    primary_stddev = utils::mean_stddev(primary_count, primary_partial_sample);\n    total_stddev = utils::mean_stddev(partition_count, partial_sample);\n}\n\nbool greedy_load_balancer::all_replica_infos_collected(const node_state &ns)\n{\n    dsn::rpc_address n = ns.addr();\n    return ns.for_each_partition([this, n](const dsn::gpid &pid) {\n        config_context &cc = *get_config_context(*(t_global_view->apps), pid);\n        if (cc.find_from_serving(n) == cc.serving.end()) {\n            ddebug(\"meta server hasn't collected gpid(%d.%d)'s info of %s\",\n                   pid.get_app_id(),\n                   pid.get_partition_index(),\n                   n.to_string());\n            return false;\n        }\n        return true;\n    });\n}\n\nvoid greedy_load_balancer::greedy_balancer(const bool balance_checker)\n{\n    dassert(t_alive_nodes >= FLAGS_min_live_node_count_for_unfreeze,\n            \"too few nodes will be freezed\");\n\n    for (auto &kv : *(t_global_view->nodes)) {\n        node_state &ns = kv.second;\n        if (!all_replica_infos_collected(ns)) {\n            return;\n        }\n    }\n\n    load_balance_policy *balance_policy = nullptr;\n    if (!FLAGS_balance_cluster) {\n        balance_policy = _app_balance_policy.get();\n    } else if (!balance_checker) {\n        balance_policy = _cluster_balance_policy.get();\n    }\n    if (balance_policy != nullptr) {\n        balance_policy->balance(balance_checker, t_global_view, t_migration_result);\n    }\n}\n\nbool greedy_load_balancer::balance(meta_view view, migration_list &list)\n{\n    ddebug(\"balancer round\");\n    list.clear();\n\n    t_alive_nodes = view.nodes->size();\n    t_global_view = &view;\n    t_migration_result = &list;\n    t_migration_result->clear();\n\n    greedy_balancer(false);\n    return !t_migration_result->empty();\n}\n\nbool greedy_load_balancer::check(meta_view view, migration_list &list)\n{\n    ddebug(\"balance checker round\");\n    list.clear();\n\n    t_alive_nodes = view.nodes->size();\n    t_global_view = &view;\n    t_migration_result = &list;\n    t_migration_result->clear();\n\n    greedy_balancer(true);\n    return !t_migration_result->empty();\n}\n\nvoid greedy_load_balancer::report(const dsn::replication::migration_list &list,\n                                  bool balance_checker)\n{\n    int counters[MAX_COUNT];\n    ::memset(counters, 0, sizeof(counters));\n\n    counters[ALL_COUNT] = list.size();\n    for (const auto &action : list) {\n        switch (action.second.get()->balance_type) {\n        case balancer_request_type::move_primary:\n            counters[MOVE_PRI_COUNT]++;\n            break;\n        case balancer_request_type::copy_primary:\n            counters[COPY_PRI_COUNT]++;\n            break;\n        case balancer_request_type::copy_secondary:\n            counters[COPY_SEC_COUNT]++;\n            break;\n        default:\n            dassert(false, \"\");\n        }\n    }\n    ::memcpy(t_operation_counters, counters, sizeof(counters));\n\n    // update perf counters\n    _balance_operation_count->set(list.size());\n    if (!balance_checker) {\n        _recent_balance_move_primary_count->add(counters[MOVE_PRI_COUNT]);\n        _recent_balance_copy_primary_count->add(counters[COPY_PRI_COUNT]);\n        _recent_balance_copy_secondary_count->add(counters[COPY_SEC_COUNT]);\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/greedy_load_balancer.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     A greedy load balancer based on Dijkstra & Ford-Fulkerson\n *\n * Revision history:\n *     2016-02-03, Weijie Sun, first version\n */\n\n#pragma once\n\n#include \"server_load_balancer.h\"\n\nnamespace dsn {\nnamespace replication {\nclass load_balance_policy;\n\nclass greedy_load_balancer : public server_load_balancer\n{\npublic:\n    explicit greedy_load_balancer(meta_service *svc);\n    ~greedy_load_balancer() override;\n    bool balance(meta_view view, migration_list &list) override;\n    bool check(meta_view view, migration_list &list) override;\n    void report(const migration_list &list, bool balance_checker) override;\n    void score(meta_view view, double &primary_stddev, double &total_stddev) override;\n\n    void register_ctrl_commands() override;\n    void unregister_ctrl_commands() override;\n\n    std::string get_balance_operation_count(const std::vector<std::string> &args) override;\n\nprivate:\n    enum operation_counters\n    {\n        MOVE_PRI_COUNT = 0,\n        COPY_PRI_COUNT = 1,\n        COPY_SEC_COUNT = 2,\n        ALL_COUNT = 3,\n        MAX_COUNT = 4\n    };\n\n    // these variables are temporarily assigned by interface \"balance\"\n    const meta_view *t_global_view;\n    migration_list *t_migration_result;\n    int t_alive_nodes;\n    int t_operation_counters[MAX_COUNT];\n\n    std::unique_ptr<load_balance_policy> _app_balance_policy;\n    std::unique_ptr<load_balance_policy> _cluster_balance_policy;\n\n    dsn_handle_t _get_balance_operation_count;\n\n    // perf counters\n    perf_counter_wrapper _balance_operation_count;\n    perf_counter_wrapper _recent_balance_move_primary_count;\n    perf_counter_wrapper _recent_balance_copy_primary_count;\n    perf_counter_wrapper _recent_balance_copy_secondary_count;\n\nprivate:\n    void greedy_balancer(bool balance_checker);\n    bool all_replica_infos_collected(const node_state &ns);\n};\n\ninline configuration_proposal_action\nnew_proposal_action(const rpc_address &target, const rpc_address &node, config_type::type type)\n{\n    configuration_proposal_action act;\n    act.__set_target(target);\n    act.__set_node(node);\n    act.__set_type(type);\n    return act;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/load_balance_policy.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"load_balance_policy.h\"\n#include \"greedy_load_balancer.h\"\n\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DECLARE_uint64(min_live_node_count_for_unfreeze);\n\nvoid dump_disk_load(app_id id, const rpc_address &node, bool only_primary, const disk_load &load)\n{\n    std::ostringstream load_string;\n    load_string << std::endl << \"<<<<<<<<<<\" << std::endl;\n    load_string << \"load for \" << node.to_string() << \", \"\n                << \"app id: \" << id;\n    if (only_primary) {\n        load_string << \", only for primary\";\n    }\n    load_string << std::endl;\n\n    for (const auto &kv : load) {\n        load_string << kv.first << \": \" << kv.second << std::endl;\n    }\n    load_string << \">>>>>>>>>>\";\n    dinfo(\"%s\", load_string.str().c_str());\n}\n\nbool calc_disk_load(node_mapper &nodes,\n                    const app_mapper &apps,\n                    app_id id,\n                    const rpc_address &node,\n                    bool only_primary,\n                    /*out*/ disk_load &load)\n{\n    load.clear();\n    const node_state *ns = get_node_state(nodes, node, false);\n    dassert(ns != nullptr, \"can't find node(%s) from node_state\", node.to_string());\n\n    auto add_one_replica_to_disk_load = [&](const gpid &pid) {\n        dinfo(\"add gpid(%d.%d) to node(%s) disk load\",\n              pid.get_app_id(),\n              pid.get_partition_index(),\n              node.to_string());\n        const config_context &cc = *get_config_context(apps, pid);\n        auto iter = cc.find_from_serving(node);\n        if (iter == cc.serving.end()) {\n            dwarn(\"can't collect gpid(%d.%d)'s info from %s, which should be primary\",\n                  pid.get_app_id(),\n                  pid.get_partition_index(),\n                  node.to_string());\n            return false;\n        } else {\n            load[iter->disk_tag]++;\n            return true;\n        }\n    };\n\n    if (only_primary) {\n        bool result = ns->for_each_primary(id, add_one_replica_to_disk_load);\n        dump_disk_load(id, node, only_primary, load);\n        return result;\n    } else {\n        bool result = ns->for_each_partition(id, add_one_replica_to_disk_load);\n        dump_disk_load(id, node, only_primary, load);\n        return result;\n    }\n}\n\nstd::unordered_map<dsn::rpc_address, disk_load>\nget_node_loads(const std::shared_ptr<app_state> &app,\n               const app_mapper &apps,\n               node_mapper &nodes,\n               bool only_primary)\n{\n    std::unordered_map<dsn::rpc_address, disk_load> node_loads;\n    for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) {\n        if (!calc_disk_load(\n                nodes, apps, app->app_id, iter->first, only_primary, node_loads[iter->first])) {\n            dwarn_f(\"stop the balancer as some replica infos aren't collected, node({}), app({})\",\n                    iter->first.to_string(),\n                    app->get_logname());\n            return node_loads;\n        }\n    }\n    return node_loads;\n}\n\nconst std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid)\n{\n    const config_context &cc = *get_config_context(apps, pid);\n    auto iter = cc.find_from_serving(node);\n    dassert(iter != cc.serving.end(),\n            \"can't find disk tag of gpid(%d.%d) for %s\",\n            pid.get_app_id(),\n            pid.get_partition_index(),\n            node.to_string());\n    return iter->disk_tag;\n}\n\nstd::shared_ptr<configuration_balancer_request>\ngenerate_balancer_request(const app_mapper &apps,\n                          const partition_configuration &pc,\n                          const balance_type &type,\n                          const rpc_address &from,\n                          const rpc_address &to)\n{\n    FAIL_POINT_INJECT_F(\"generate_balancer_request\", [](string_view name) { return nullptr; });\n\n    configuration_balancer_request result;\n    result.gpid = pc.pid;\n\n    std::string ans;\n    switch (type) {\n    case balance_type::MOVE_PRIMARY:\n        ans = \"move_primary\";\n        result.balance_type = balancer_request_type::move_primary;\n        result.action_list.emplace_back(\n            new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY));\n        result.action_list.emplace_back(\n            new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY));\n        break;\n    case balance_type::COPY_PRIMARY:\n        ans = \"copy_primary\";\n        result.balance_type = balancer_request_type::copy_primary;\n        result.action_list.emplace_back(\n            new_proposal_action(from, to, config_type::CT_ADD_SECONDARY_FOR_LB));\n        result.action_list.emplace_back(\n            new_proposal_action(from, from, config_type::CT_DOWNGRADE_TO_SECONDARY));\n        result.action_list.emplace_back(\n            new_proposal_action(to, to, config_type::CT_UPGRADE_TO_PRIMARY));\n        result.action_list.emplace_back(new_proposal_action(to, from, config_type::CT_REMOVE));\n        break;\n    case balance_type::COPY_SECONDARY:\n        ans = \"copy_secondary\";\n        result.balance_type = balancer_request_type::copy_secondary;\n        result.action_list.emplace_back(\n            new_proposal_action(pc.primary, to, config_type::CT_ADD_SECONDARY_FOR_LB));\n        result.action_list.emplace_back(\n            new_proposal_action(pc.primary, from, config_type::CT_REMOVE));\n        break;\n    default:\n        dassert(false, \"\");\n    }\n    ddebug(\"generate balancer: %d.%d %s from %s of disk_tag(%s) to %s\",\n           pc.pid.get_app_id(),\n           pc.pid.get_partition_index(),\n           ans.c_str(),\n           from.to_string(),\n           get_disk_tag(apps, from, pc.pid).c_str(),\n           to.to_string());\n    return std::make_shared<configuration_balancer_request>(std::move(result));\n}\n\nload_balance_policy::load_balance_policy(meta_service *svc)\n    : _svc(svc), _ctrl_balancer_ignored_apps(nullptr)\n{\n    register_ctrl_commands();\n}\n\nload_balance_policy::~load_balance_policy() { unregister_ctrl_commands(); }\n\nvoid load_balance_policy::init(const meta_view *global_view, migration_list *list)\n{\n    _global_view = global_view;\n    _migration_result = list;\n    const node_mapper &nodes = *_global_view->nodes;\n    _alive_nodes = nodes.size();\n    number_nodes(nodes);\n}\n\nbool load_balance_policy::primary_balance(const std::shared_ptr<app_state> &app,\n                                          bool only_move_primary)\n{\n    dassert(_alive_nodes >= FLAGS_min_live_node_count_for_unfreeze,\n            \"too few alive nodes will lead to freeze\");\n    ddebug_f(\"primary balancer for app({}:{})\", app->app_name, app->app_id);\n\n    auto graph = ford_fulkerson::builder(app, *_global_view->nodes, address_id).build();\n    if (nullptr == graph) {\n        dinfo_f(\"the primaries are balanced for app({}:{})\", app->app_name, app->app_id);\n        return true;\n    }\n\n    auto path = graph->find_shortest_path();\n    if (path != nullptr) {\n        dinfo_f(\"{} primaries are flew\", path->_flow.back());\n        return move_primary(std::move(path));\n    } else {\n        ddebug_f(\"we can't make the server load more balanced by moving primaries to secondaries\");\n        if (!only_move_primary) {\n            return copy_primary(app, graph->have_less_than_average());\n        } else {\n            ddebug_f(\"stop to copy primary for app({}) coz it is disabled\", app->get_logname());\n            return true;\n        }\n    }\n}\n\nbool load_balance_policy::copy_primary(const std::shared_ptr<app_state> &app,\n                                       bool still_have_less_than_average)\n{\n    node_mapper &nodes = *(_global_view->nodes);\n    const app_mapper &apps = *_global_view->apps;\n    int replicas_low = app->partition_count / _alive_nodes;\n\n    std::unique_ptr<copy_replica_operation> operation = dsn::make_unique<copy_primary_operation>(\n        app, apps, nodes, address_vec, address_id, still_have_less_than_average, replicas_low);\n    return operation->start(_migration_result);\n}\n\nbool load_balance_policy::move_primary(std::unique_ptr<flow_path> path)\n{\n    // used to calculate the primary disk loads of each server.\n    // disk_load[disk_tag] means how many primaies on this \"disk_tag\".\n    // IF disk_load.find(disk_tag) == disk_load.end(), means 0\n    disk_load loads[2];\n    disk_load *prev_load = &loads[0];\n    disk_load *current_load = &loads[1];\n    node_mapper &nodes = *(_global_view->nodes);\n    const app_mapper &apps = *(_global_view->apps);\n\n    int current = path->_prev.back();\n    if (!calc_disk_load(\n            nodes, apps, path->_app->app_id, address_vec[current], true, *current_load)) {\n        dwarn_f(\"stop move primary as some replica infos aren't collected, node({}), app({})\",\n                address_vec[current].to_string(),\n                path->_app->get_logname());\n        return false;\n    }\n\n    int plan_moving = path->_flow.back();\n    while (path->_prev[current] != 0) {\n        rpc_address from = address_vec[path->_prev[current]];\n        rpc_address to = address_vec[current];\n        if (!calc_disk_load(nodes, apps, path->_app->app_id, from, true, *prev_load)) {\n            dwarn_f(\"stop move primary as some replica infos aren't collected, node({}), app({})\",\n                    from.to_string(),\n                    path->_app->get_logname());\n            return false;\n        }\n\n        start_moving_primary(path->_app, from, to, plan_moving, prev_load, current_load);\n\n        current = path->_prev[current];\n        std::swap(current_load, prev_load);\n    }\n    return true;\n}\n\nvoid load_balance_policy::start_moving_primary(const std::shared_ptr<app_state> &app,\n                                               const rpc_address &from,\n                                               const rpc_address &to,\n                                               int plan_moving,\n                                               disk_load *prev_load,\n                                               disk_load *current_load)\n{\n    std::list<dsn::gpid> potential_moving = calc_potential_moving(app, from, to);\n    auto potential_moving_size = potential_moving.size();\n    dassert_f(plan_moving <= potential_moving_size,\n              \"from({}) to({}) plan({}), can_move({})\",\n              from.to_string(),\n              to.to_string(),\n              plan_moving,\n              potential_moving_size);\n\n    while (plan_moving-- > 0) {\n        dsn::gpid selected = select_moving(potential_moving, prev_load, current_load, from, to);\n\n        const partition_configuration &pc = app->partitions[selected.get_partition_index()];\n        auto balancer_result = _migration_result->emplace(\n            selected,\n            generate_balancer_request(\n                *_global_view->apps, pc, balance_type::MOVE_PRIMARY, from, to));\n        dassert_f(balancer_result.second, \"gpid({}) already inserted as an action\", selected);\n\n        --(*prev_load)[get_disk_tag(*_global_view->apps, from, selected)];\n        ++(*current_load)[get_disk_tag(*_global_view->apps, to, selected)];\n    }\n}\n\nstd::list<dsn::gpid> load_balance_policy::calc_potential_moving(\n    const std::shared_ptr<app_state> &app, const rpc_address &from, const rpc_address &to)\n{\n    std::list<dsn::gpid> potential_moving;\n    const node_state &ns = _global_view->nodes->find(from)->second;\n    ns.for_each_primary(app->app_id, [&](const gpid &pid) {\n        const partition_configuration &pc = app->partitions[pid.get_partition_index()];\n        if (is_secondary(pc, to)) {\n            potential_moving.push_back(pid);\n        }\n        return true;\n    });\n    return potential_moving;\n}\n\ndsn::gpid load_balance_policy::select_moving(std::list<dsn::gpid> &potential_moving,\n                                             disk_load *prev_load,\n                                             disk_load *current_load,\n                                             rpc_address from,\n                                             rpc_address to)\n{\n    std::list<dsn::gpid>::iterator selected = potential_moving.end();\n    int max = std::numeric_limits<int>::min();\n\n    for (auto it = potential_moving.begin(); it != potential_moving.end(); ++it) {\n        int load_difference = (*prev_load)[get_disk_tag(*_global_view->apps, from, *it)] -\n                              (*current_load)[get_disk_tag(*_global_view->apps, to, *it)];\n        if (load_difference > max) {\n            max = load_difference;\n            selected = it;\n        }\n    }\n\n    dassert_f(selected != potential_moving.end(),\n              \"can't find gpid to move from({}) to({})\",\n              from.to_string(),\n              to.to_string());\n    auto res = *selected;\n    potential_moving.erase(selected);\n    return res;\n}\n\nbool load_balance_policy::execute_balance(\n    const app_mapper &apps,\n    bool balance_checker,\n    bool balance_in_turn,\n    bool only_move_primary,\n    const std::function<bool(const std::shared_ptr<app_state> &, bool)> &balance_operation)\n{\n    for (const auto &kv : apps) {\n        const std::shared_ptr<app_state> &app = kv.second;\n        if (is_ignored_app(kv.first)) {\n            ddebug_f(\"skip to do balance for the ignored app[{}]\", app->get_logname());\n            continue;\n        }\n        if (app->status != app_status::AS_AVAILABLE || app->is_bulk_loading || app->splitting())\n            continue;\n\n        bool enough_information = balance_operation(app, only_move_primary);\n        if (!enough_information) {\n            // Even if we don't have enough info for current app,\n            // the decisions made by previous apps are kept.\n            // t_migration_result->empty();\n            return false;\n        }\n        if (!balance_checker) {\n            if (!_migration_result->empty()) {\n                if (balance_in_turn) {\n                    ddebug(\"stop to handle more apps after we found some actions for %s\",\n                           app->get_logname());\n                    return false;\n                }\n            }\n        }\n    }\n    return true;\n}\n\nvoid load_balance_policy::register_ctrl_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _ctrl_balancer_ignored_apps = dsn::command_manager::instance().register_command(\n            {\"meta.lb.ignored_app_list\"},\n            \"meta.lb.ignored_app_list <get|set|clear> [app_id1,app_id2..]\",\n            \"get, set and clear balancer ignored_app_list\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_balancer_ignored_app_ids(args);\n            });\n    });\n}\n\nvoid load_balance_policy::unregister_ctrl_commands()\n{\n    UNREGISTER_VALID_HANDLER(_ctrl_balancer_ignored_apps);\n}\n\nstd::string\nload_balance_policy::remote_command_balancer_ignored_app_ids(const std::vector<std::string> &args)\n{\n    static const std::string invalid_arguments(\"invalid arguments\");\n    if (args.empty()) {\n        return invalid_arguments;\n    }\n    if (args[0] == \"set\") {\n        return set_balancer_ignored_app_ids(args);\n    }\n    if (args[0] == \"get\") {\n        return get_balancer_ignored_app_ids();\n    }\n    if (args[0] == \"clear\") {\n        return clear_balancer_ignored_app_ids();\n    }\n    return invalid_arguments;\n}\n\nstd::string load_balance_policy::set_balancer_ignored_app_ids(const std::vector<std::string> &args)\n{\n    static const std::string invalid_arguments(\"invalid arguments\");\n    if (args.size() != 2) {\n        return invalid_arguments;\n    }\n\n    std::vector<std::string> app_ids;\n    dsn::utils::split_args(args[1].c_str(), app_ids, ',');\n    if (app_ids.empty()) {\n        return invalid_arguments;\n    }\n\n    std::set<app_id> app_list;\n    for (const std::string &app_id_str : app_ids) {\n        app_id app;\n        if (!dsn::buf2int32(app_id_str, app)) {\n            return invalid_arguments;\n        }\n        app_list.insert(app);\n    }\n\n    dsn::zauto_write_lock l(_balancer_ignored_apps_lock);\n    _balancer_ignored_apps = std::move(app_list);\n    return \"set ok\";\n}\n\nstd::string load_balance_policy::get_balancer_ignored_app_ids()\n{\n    std::stringstream oss;\n    dsn::zauto_read_lock l(_balancer_ignored_apps_lock);\n    if (_balancer_ignored_apps.empty()) {\n        return \"no ignored apps\";\n    }\n    oss << \"ignored_app_id_list: \";\n    std::copy(_balancer_ignored_apps.begin(),\n              _balancer_ignored_apps.end(),\n              std::ostream_iterator<app_id>(oss, \",\"));\n    std::string app_ids = oss.str();\n    app_ids[app_ids.size() - 1] = '\\0';\n    return app_ids;\n}\n\nstd::string load_balance_policy::clear_balancer_ignored_app_ids()\n{\n    dsn::zauto_write_lock l(_balancer_ignored_apps_lock);\n    _balancer_ignored_apps.clear();\n    return \"clear ok\";\n}\n\nbool load_balance_policy::is_ignored_app(app_id app_id)\n{\n    dsn::zauto_read_lock l(_balancer_ignored_apps_lock);\n    return _balancer_ignored_apps.find(app_id) != _balancer_ignored_apps.end();\n}\n\nvoid load_balance_policy::number_nodes(const node_mapper &nodes)\n{\n    int current_id = 1;\n\n    address_id.clear();\n    address_vec.resize(_alive_nodes + 2);\n    for (auto iter = nodes.begin(); iter != nodes.end(); ++iter) {\n        dassert(!iter->first.is_invalid() && !iter->second.addr().is_invalid(), \"invalid address\");\n        dassert(iter->second.alive(), \"dead node\");\n\n        address_id[iter->first] = current_id;\n        address_vec[current_id] = iter->first;\n        ++current_id;\n    }\n}\n\nford_fulkerson::ford_fulkerson(const std::shared_ptr<app_state> &app,\n                               const node_mapper &nodes,\n                               const std::unordered_map<dsn::rpc_address, int> &address_id,\n                               uint32_t higher_count,\n                               uint32_t lower_count,\n                               int replicas_low)\n    : _app(app),\n      _nodes(nodes),\n      _address_id(address_id),\n      _higher_count(higher_count),\n      _lower_count(lower_count),\n      _replicas_low(replicas_low)\n{\n    make_graph();\n}\n\n// using dijstra to find shortest path\nstd::unique_ptr<flow_path> ford_fulkerson::find_shortest_path()\n{\n    std::vector<bool> visit(_graph_nodes, false);\n    std::vector<int> flow(_graph_nodes, 0);\n    std::vector<int> prev(_graph_nodes, -1);\n    flow[0] = INT_MAX;\n    while (!visit.back()) {\n        auto pos = select_node(visit, flow);\n        if (pos == -1) {\n            break;\n        }\n        update_flow(pos, visit, _network, flow, prev);\n    }\n\n    if (visit.back() && flow.back() != 0) {\n        return dsn::make_unique<struct flow_path>(_app, std::move(flow), std::move(prev));\n    } else {\n        return nullptr;\n    }\n}\n\nvoid ford_fulkerson::make_graph()\n{\n    _graph_nodes = _nodes.size() + 2;\n    _network.resize(_graph_nodes, std::vector<int>(_graph_nodes, 0));\n    for (const auto &node : _nodes) {\n        int node_id = _address_id.at(node.first);\n        add_edge(node_id, node.second);\n        update_decree(node_id, node.second);\n    }\n    handle_corner_case();\n}\n\nvoid ford_fulkerson::add_edge(int node_id, const node_state &ns)\n{\n    int primary_count = ns.primary_count(_app->app_id);\n    if (primary_count > _replicas_low) {\n        _network[0][node_id] = primary_count - _replicas_low;\n    } else {\n        _network[node_id].back() = _replicas_low - primary_count;\n    }\n}\n\nvoid ford_fulkerson::update_decree(int node_id, const node_state &ns)\n{\n    ns.for_each_primary(_app->app_id, [&, this](const gpid &pid) {\n        const partition_configuration &pc = _app->partitions[pid.get_partition_index()];\n        for (const auto &secondary : pc.secondaries) {\n            auto i = _address_id.find(secondary);\n            dassert_f(i != _address_id.end(),\n                      \"invalid secondary address, address = {}\",\n                      secondary.to_string());\n            _network[node_id][i->second]++;\n        }\n        return true;\n    });\n}\n\nvoid ford_fulkerson::handle_corner_case()\n{\n    // Suppose you have an 8-shard app in a cluster with 3 nodes(which name are node1, node2,\n    // node3). The distribution of primaries among these nodes is as follow:\n    // node1 : [0, 1, 2, 3]\n    // node2 : [4, 5]\n    // node2 : [6, 7]\n    // This is obviously unbalanced.\n    // But if we don't handle this corner case, primary migration will not be triggered\n    if (_higher_count > 0 && _lower_count == 0) {\n        for (int i = 0; i != _graph_nodes; ++i) {\n            if (_network[0][i] > 0)\n                --_network[0][i];\n            else\n                ++_network[i][_graph_nodes - 1];\n        }\n    }\n}\n\nint ford_fulkerson::select_node(std::vector<bool> &visit, const std::vector<int> &flow)\n{\n    auto pos = max_value_pos(visit, flow);\n    if (pos != -1) {\n        visit[pos] = true;\n    }\n    return pos;\n}\n\nint ford_fulkerson::max_value_pos(const std::vector<bool> &visit, const std::vector<int> &flow)\n{\n    int pos = -1, max_value = 0;\n    for (auto i = 0; i != _graph_nodes; ++i) {\n        if (!visit[i] && flow[i] > max_value) {\n            pos = i;\n            max_value = flow[i];\n        }\n    }\n    return pos;\n}\n\nvoid ford_fulkerson::update_flow(int pos,\n                                 const std::vector<bool> &visit,\n                                 const std::vector<std::vector<int>> &network,\n                                 std::vector<int> &flow,\n                                 std::vector<int> &prev)\n{\n    for (auto i = 0; i != _graph_nodes; ++i) {\n        if (visit[i]) {\n            continue;\n        }\n\n        auto min = std::min(flow[pos], network[pos][i]);\n        if (min > flow[i]) {\n            flow[i] = min;\n            prev[i] = pos;\n        }\n    }\n}\n\ncopy_replica_operation::copy_replica_operation(\n    const std::shared_ptr<app_state> app,\n    const app_mapper &apps,\n    node_mapper &nodes,\n    const std::vector<dsn::rpc_address> &address_vec,\n    const std::unordered_map<dsn::rpc_address, int> &address_id)\n    : _app(app), _apps(apps), _nodes(nodes), _address_vec(address_vec), _address_id(address_id)\n{\n}\n\nbool copy_replica_operation::start(migration_list *result)\n{\n    init_ordered_address_ids();\n    _node_loads = get_node_loads(_app, _apps, _nodes, only_copy_primary());\n    if (_node_loads.size() != _nodes.size()) {\n        return false;\n    }\n\n    while (true) {\n        if (!can_continue()) {\n            break;\n        }\n\n        gpid selected_pid = select_partition(result);\n        if (selected_pid.get_app_id() != -1) {\n            copy_once(selected_pid, result);\n            update_ordered_address_ids();\n        } else {\n            _ordered_address_ids.erase(--_ordered_address_ids.end());\n        }\n    }\n    return true;\n}\n\nconst partition_set *copy_replica_operation::get_all_partitions()\n{\n    int id_max = *_ordered_address_ids.rbegin();\n    const node_state &ns = _nodes.find(_address_vec[id_max])->second;\n    const partition_set *partitions = ns.partitions(_app->app_id, only_copy_primary());\n    return partitions;\n}\n\ngpid copy_replica_operation::select_max_load_gpid(const partition_set *partitions,\n                                                  migration_list *result)\n{\n    int id_max = *_ordered_address_ids.rbegin();\n    const disk_load &load_on_max = _node_loads.at(_address_vec[id_max]);\n\n    gpid selected_pid(-1, -1);\n    int max_load = -1;\n    for (const gpid &pid : *partitions) {\n        if (!can_select(pid, result)) {\n            continue;\n        }\n\n        const std::string &disk_tag = get_disk_tag(_apps, _address_vec[id_max], pid);\n        auto load = load_on_max.at(disk_tag);\n        if (load > max_load) {\n            selected_pid = pid;\n            max_load = load;\n        }\n    }\n    return selected_pid;\n}\n\nvoid copy_replica_operation::copy_once(gpid selected_pid, migration_list *result)\n{\n    auto from = _address_vec[*_ordered_address_ids.rbegin()];\n    auto to = _address_vec[*_ordered_address_ids.begin()];\n\n    auto pc = _app->partitions[selected_pid.get_partition_index()];\n    auto request = generate_balancer_request(_apps, pc, get_balance_type(), from, to);\n    result->emplace(selected_pid, request);\n}\n\nvoid copy_replica_operation::update_ordered_address_ids()\n{\n    int id_min = *_ordered_address_ids.begin();\n    int id_max = *_ordered_address_ids.rbegin();\n    --_partition_counts[id_max];\n    ++_partition_counts[id_min];\n\n    _ordered_address_ids.erase(_ordered_address_ids.begin());\n    _ordered_address_ids.erase(--_ordered_address_ids.end());\n\n    _ordered_address_ids.insert(id_max);\n    _ordered_address_ids.insert(id_min);\n}\n\nvoid copy_replica_operation::init_ordered_address_ids()\n{\n    _partition_counts.resize(_address_vec.size(), 0);\n    for (const auto &iter : _nodes) {\n        auto id = _address_id.at(iter.first);\n        _partition_counts[id] = get_partition_count(iter.second);\n    }\n\n    std::set<int, std::function<bool(int left, int right)>> ordered_queue(\n        [this](int left, int right) {\n            return _partition_counts[left] != _partition_counts[right]\n                       ? _partition_counts[left] < _partition_counts[right]\n                       : left < right;\n        });\n    for (const auto &iter : _nodes) {\n        auto id = _address_id.at(iter.first);\n        ordered_queue.insert(id);\n    }\n    _ordered_address_ids.swap(ordered_queue);\n}\n\ngpid copy_replica_operation::select_partition(migration_list *result)\n{\n    const partition_set *partitions = get_all_partitions();\n\n    int id_max = *_ordered_address_ids.rbegin();\n    const node_state &ns = _nodes.find(_address_vec[id_max])->second;\n    dassert_f(partitions != nullptr && !partitions->empty(),\n              \"max load({}) shouldn't empty\",\n              ns.addr().to_string());\n\n    return select_max_load_gpid(partitions, result);\n}\n\ncopy_primary_operation::copy_primary_operation(\n    const std::shared_ptr<app_state> app,\n    const app_mapper &apps,\n    node_mapper &nodes,\n    const std::vector<dsn::rpc_address> &address_vec,\n    const std::unordered_map<dsn::rpc_address, int> &address_id,\n    bool have_lower_than_average,\n    int replicas_low)\n    : copy_replica_operation(app, apps, nodes, address_vec, address_id)\n{\n    _have_lower_than_average = have_lower_than_average;\n    _replicas_low = replicas_low;\n}\n\nint copy_primary_operation::get_partition_count(const node_state &ns) const\n{\n    return ns.primary_count(_app->app_id);\n}\n\nbool copy_primary_operation::can_select(gpid pid, migration_list *result)\n{\n    return result->find(pid) == result->end();\n}\n\nbool copy_primary_operation::can_continue()\n{\n    int id_min = *_ordered_address_ids.begin();\n    if (_have_lower_than_average && _partition_counts[id_min] >= _replicas_low) {\n        ddebug_f(\"{}: stop the copy due to primaries on all nodes will reach low later.\",\n                 _app->get_logname());\n        return false;\n    }\n\n    int id_max = *_ordered_address_ids.rbegin();\n    if (!_have_lower_than_average && _partition_counts[id_max] - _partition_counts[id_min] <= 1) {\n        ddebug_f(\"{}: stop the copy due to the primary will be balanced later.\",\n                 _app->get_logname());\n        return false;\n    }\n    return true;\n}\n\nenum balance_type copy_primary_operation::get_balance_type() { return balance_type::COPY_PRIMARY; }\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/load_balance_policy.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\n// disk_tag->primary_count/total_count_on_this_disk\ntypedef std::map<std::string, int> disk_load;\n\nenum class balance_type\n{\n    COPY_PRIMARY = 0,\n    COPY_SECONDARY,\n    MOVE_PRIMARY,\n    INVALID,\n};\nENUM_BEGIN(balance_type, balance_type::INVALID)\nENUM_REG(balance_type::COPY_PRIMARY)\nENUM_REG(balance_type::COPY_SECONDARY)\nENUM_REG(balance_type::MOVE_PRIMARY)\nENUM_END(balance_type)\n\nbool calc_disk_load(node_mapper &nodes,\n                    const app_mapper &apps,\n                    app_id id,\n                    const rpc_address &node,\n                    bool only_primary,\n                    /*out*/ disk_load &load);\nconst std::string &get_disk_tag(const app_mapper &apps, const rpc_address &node, const gpid &pid);\nstd::shared_ptr<configuration_balancer_request>\ngenerate_balancer_request(const app_mapper &apps,\n                          const partition_configuration &pc,\n                          const balance_type &type,\n                          const rpc_address &from,\n                          const rpc_address &to);\n\nstruct flow_path;\nclass meta_service;\nclass load_balance_policy\n{\npublic:\n    load_balance_policy(meta_service *svc);\n    virtual ~load_balance_policy() = 0;\n\n    virtual void balance(bool checker, const meta_view *global_view, migration_list *list) = 0;\n\nprotected:\n    void init(const meta_view *global_view, migration_list *list);\n    bool is_ignored_app(app_id app_id);\n\n    bool execute_balance(\n        const app_mapper &apps,\n        bool balance_checker,\n        bool balance_in_turn,\n        bool only_move_primary,\n        const std::function<bool(const std::shared_ptr<app_state> &, bool)> &balance_operation);\n    bool primary_balance(const std::shared_ptr<app_state> &app, bool only_move_primary);\n    bool move_primary(std::unique_ptr<flow_path> path);\n    bool copy_primary(const std::shared_ptr<app_state> &app, bool still_have_less_than_average);\n\n    meta_service *_svc;\n    const meta_view *_global_view;\n    migration_list *_migration_result;\n    int _alive_nodes;\n    // this is used to assign an integer id for every node\n    // and these are generated from the above data, which are tempory too\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    std::vector<dsn::rpc_address> address_vec;\n\n    // the app set which won't be re-balanced\n    dsn::zrwlock_nr _balancer_ignored_apps_lock; // {\n    std::set<app_id> _balancer_ignored_apps;\n    // }\n    dsn_handle_t _ctrl_balancer_ignored_apps;\n\nprivate:\n    void start_moving_primary(const std::shared_ptr<app_state> &app,\n                              const rpc_address &from,\n                              const rpc_address &to,\n                              int plan_moving,\n                              disk_load *prev_load,\n                              disk_load *current_load);\n    std::list<dsn::gpid> calc_potential_moving(const std::shared_ptr<app_state> &app,\n                                               const rpc_address &from,\n                                               const rpc_address &to);\n    dsn::gpid select_moving(std::list<dsn::gpid> &potential_moving,\n                            disk_load *prev_load,\n                            disk_load *current_load,\n                            rpc_address from,\n                            rpc_address to);\n    void number_nodes(const node_mapper &nodes);\n\n    std::string remote_command_balancer_ignored_app_ids(const std::vector<std::string> &args);\n    std::string set_balancer_ignored_app_ids(const std::vector<std::string> &args);\n    std::string get_balancer_ignored_app_ids();\n    std::string clear_balancer_ignored_app_ids();\n\n    void register_ctrl_commands();\n    void unregister_ctrl_commands();\n\n    FRIEND_TEST(cluster_balance_policy, calc_potential_moving);\n};\n\nstruct flow_path\n{\n    flow_path(const std::shared_ptr<app_state> &app,\n              std::vector<int> &&flow,\n              std::vector<int> &&prev)\n        : _app(app), _flow(std::move(flow)), _prev(std::move(prev))\n    {\n    }\n\n    const std::shared_ptr<app_state> &_app;\n    std::vector<int> _flow, _prev;\n};\n\n// Ford Fulkerson is used for primary balance.\n// For more details: https://levy5307.github.io/blog/pegasus-balancer/\nclass ford_fulkerson\n{\npublic:\n    ford_fulkerson() = delete;\n    ford_fulkerson(const std::shared_ptr<app_state> &app,\n                   const node_mapper &nodes,\n                   const std::unordered_map<dsn::rpc_address, int> &address_id,\n                   uint32_t higher_count,\n                   uint32_t lower_count,\n                   int replicas_low);\n\n    // using dijstra to find shortest path\n    std::unique_ptr<flow_path> find_shortest_path();\n    bool have_less_than_average() const { return _lower_count != 0; }\n\n    class builder\n    {\n    public:\n        builder(const std::shared_ptr<app_state> &app,\n                const node_mapper &nodes,\n                const std::unordered_map<dsn::rpc_address, int> &address_id)\n            : _app(app), _nodes(nodes), _address_id(address_id)\n        {\n        }\n\n        std::unique_ptr<ford_fulkerson> build()\n        {\n            auto nodes_count = _nodes.size();\n            int replicas_low = _app->partition_count / nodes_count;\n            int replicas_high = (_app->partition_count + nodes_count - 1) / nodes_count;\n\n            uint32_t higher_count = 0, lower_count = 0;\n            for (const auto &node : _nodes) {\n                int primary_count = node.second.primary_count(_app->app_id);\n                if (primary_count > replicas_high) {\n                    higher_count++;\n                } else if (primary_count < replicas_low) {\n                    lower_count++;\n                }\n            }\n\n            if (0 == higher_count && 0 == lower_count) {\n                return nullptr;\n            }\n            return dsn::make_unique<ford_fulkerson>(\n                _app, _nodes, _address_id, higher_count, lower_count, replicas_low);\n        }\n\n    private:\n        const std::shared_ptr<app_state> &_app;\n        const node_mapper &_nodes;\n        const std::unordered_map<dsn::rpc_address, int> &_address_id;\n    };\n\nprivate:\n    void make_graph();\n    void add_edge(int node_id, const node_state &ns);\n    void update_decree(int node_id, const node_state &ns);\n    void handle_corner_case();\n\n    int select_node(std::vector<bool> &visit, const std::vector<int> &flow);\n    int max_value_pos(const std::vector<bool> &visit, const std::vector<int> &flow);\n    void update_flow(int pos,\n                     const std::vector<bool> &visit,\n                     const std::vector<std::vector<int>> &network,\n                     std::vector<int> &flow,\n                     std::vector<int> &prev);\n\n    const std::shared_ptr<app_state> &_app;\n    const node_mapper &_nodes;\n    const std::unordered_map<dsn::rpc_address, int> &_address_id;\n    uint32_t _higher_count;\n    uint32_t _lower_count;\n    int _replicas_low;\n    size_t _graph_nodes;\n    std::vector<std::vector<int>> _network;\n\n    FRIEND_TEST(ford_fulkerson, add_edge);\n    FRIEND_TEST(ford_fulkerson, update_decree);\n    FRIEND_TEST(ford_fulkerson, find_shortest_path);\n    FRIEND_TEST(ford_fulkerson, max_value_pos);\n    FRIEND_TEST(ford_fulkerson, select_node);\n};\n\nclass copy_replica_operation\n{\npublic:\n    copy_replica_operation(const std::shared_ptr<app_state> app,\n                           const app_mapper &apps,\n                           node_mapper &nodes,\n                           const std::vector<dsn::rpc_address> &address_vec,\n                           const std::unordered_map<dsn::rpc_address, int> &address_id);\n    virtual ~copy_replica_operation() = default;\n\n    bool start(migration_list *result);\n\nprotected:\n    void init_ordered_address_ids();\n    virtual int get_partition_count(const node_state &ns) const = 0;\n\n    gpid select_partition(migration_list *result);\n    const partition_set *get_all_partitions();\n    gpid select_max_load_gpid(const partition_set *partitions, migration_list *result);\n    void copy_once(gpid selected_pid, migration_list *result);\n    void update_ordered_address_ids();\n    virtual bool only_copy_primary() = 0;\n    virtual bool can_select(gpid pid, migration_list *result) = 0;\n    virtual bool can_continue() = 0;\n    virtual balance_type get_balance_type() = 0;\n\n    std::set<int, std::function<bool(int left, int right)>> _ordered_address_ids;\n    const std::shared_ptr<app_state> _app;\n    const app_mapper &_apps;\n    node_mapper &_nodes;\n    const std::vector<dsn::rpc_address> &_address_vec;\n    const std::unordered_map<dsn::rpc_address, int> &_address_id;\n    std::unordered_map<dsn::rpc_address, disk_load> _node_loads;\n    std::vector<int> _partition_counts;\n\n    FRIEND_TEST(copy_primary_operation, misc);\n    FRIEND_TEST(copy_replica_operation, get_all_partitions);\n};\n\nclass copy_primary_operation : public copy_replica_operation\n{\npublic:\n    copy_primary_operation(const std::shared_ptr<app_state> app,\n                           const app_mapper &apps,\n                           node_mapper &nodes,\n                           const std::vector<dsn::rpc_address> &address_vec,\n                           const std::unordered_map<dsn::rpc_address, int> &address_id,\n                           bool have_lower_than_average,\n                           int replicas_low);\n    ~copy_primary_operation() = default;\n\nprivate:\n    int get_partition_count(const node_state &ns) const;\n\n    bool only_copy_primary() { return true; }\n    bool can_select(gpid pid, migration_list *result);\n    bool can_continue();\n    enum balance_type get_balance_type();\n\n    bool _have_lower_than_average;\n    int _replicas_low;\n\n    FRIEND_TEST(copy_primary_operation, misc);\n    FRIEND_TEST(copy_primary_operation, can_select);\n    FRIEND_TEST(copy_primary_operation, only_copy_primary);\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_backup_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/http/http_server.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/output_utils.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"block_service/block_service_manager.h\"\n#include \"common/backup_common.h\"\n#include \"meta_backup_service.h\"\n#include \"meta_service.h\"\n#include \"server_state.h\"\n\nnamespace dsn {\nnamespace replication {\n\n// TODO: backup_service and policy_context should need two locks, its own _lock and server_state's\n// _lock this maybe lead to deadlock, should refactor this\n\nvoid policy_context::start_backup_app_meta_unlocked(int32_t app_id)\n{\n    server_state *state = _backup_service->get_state();\n    dsn::blob buffer;\n    bool app_available = false;\n    {\n        zauto_read_lock l;\n        state->lock_read(l);\n        const std::shared_ptr<app_state> &app = state->get_app(app_id);\n        if (app != nullptr && app->status == app_status::AS_AVAILABLE) {\n            app_available = true;\n            // do not persistent envs to backup file\n            if (app->envs.empty()) {\n                buffer = dsn::json::json_forwarder<app_info>::encode(*app);\n            } else {\n                app_state tmp = *app;\n                tmp.envs.clear();\n                buffer = dsn::json::json_forwarder<app_info>::encode(tmp);\n            }\n        }\n    }\n\n    // if app is dropped when app is under backuping, we just skip backup this app this time, and\n    // also we will not write backup-finish-flag on fds\n    if (!app_available) {\n        dwarn(\"%s: can't encode app_info for app(%d), perhaps removed, treat it as backup finished\",\n              _backup_sig.c_str(),\n              app_id);\n        auto iter = _progress.unfinished_partitions_per_app.find(app_id);\n        dassert(iter != _progress.unfinished_partitions_per_app.end(),\n                \"%s: can't find app(%d) in unfished_map\",\n                _backup_sig.c_str(),\n                app_id);\n        _progress.is_app_skipped[app_id] = true;\n        int total_partitions = iter->second;\n        for (int32_t pidx = 0; pidx < total_partitions; ++pidx) {\n            update_partition_progress_unlocked(\n                gpid(app_id, pidx), cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address());\n        }\n        return;\n    }\n\n    dist::block_service::create_file_request create_file_req;\n    create_file_req.ignore_metadata = true;\n    create_file_req.file_name = cold_backup::get_app_metadata_file(_backup_service->backup_root(),\n                                                                   _policy.app_names.at(app_id),\n                                                                   app_id,\n                                                                   _cur_backup.backup_id);\n\n    dsn::error_code err;\n    dist::block_service::block_file_ptr remote_file;\n    // here we can use synchronous way coz create_file with ignored metadata is very fast\n    _block_service\n        ->create_file(create_file_req,\n                      TASK_CODE_EXEC_INLINED,\n                      [&err, &remote_file](const dist::block_service::create_file_response &resp) {\n                          err = resp.err;\n                          remote_file = resp.file_handle;\n                      })\n        ->wait();\n    if (err != dsn::ERR_OK) {\n        derror(\"%s: create file %s failed, restart this backup later\",\n               _backup_sig.c_str(),\n               create_file_req.file_name.c_str());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this, app_id]() {\n                             zauto_lock l(_lock);\n                             start_backup_app_meta_unlocked(app_id);\n                         },\n                         0,\n                         _backup_service->backup_option().block_retry_delay_ms);\n        return;\n    }\n    dassert(remote_file != nullptr,\n            \"%s: create file(%s) succeed, but can't get handle\",\n            _backup_sig.c_str(),\n            create_file_req.file_name.c_str());\n\n    remote_file->write(\n        dist::block_service::write_request{buffer},\n        LPC_DEFAULT_CALLBACK,\n        [this, remote_file, buffer, app_id](const dist::block_service::write_response &resp) {\n            if (resp.err == dsn::ERR_OK) {\n                dassert(resp.written_size == buffer.length(),\n                        \"write %s length not match, source(%u), actual(%llu)\",\n                        remote_file->file_name().c_str(),\n                        buffer.length(),\n                        resp.written_size);\n                {\n                    zauto_lock l(_lock);\n                    ddebug(\"%s: successfully backup app metadata to %s\",\n                           _policy.policy_name.c_str(),\n                           remote_file->file_name().c_str());\n                    start_backup_app_partitions_unlocked(app_id);\n                }\n            } else if (resp.err == ERR_FS_INTERNAL) {\n                zauto_lock l(_lock);\n                _is_backup_failed = true;\n                derror_f(\"write {} failed, err = {}, don't try again when got this error.\",\n                         remote_file->file_name(),\n                         resp.err.to_string());\n                return;\n            } else {\n                dwarn(\"write %s failed, reason(%s), try it later\",\n                      remote_file->file_name().c_str(),\n                      resp.err.to_string());\n                tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                                 &_tracker,\n                                 [this, app_id]() {\n                                     zauto_lock l(_lock);\n                                     start_backup_app_meta_unlocked(app_id);\n                                 },\n                                 0,\n                                 _backup_service->backup_option().block_retry_delay_ms);\n            }\n        },\n        &_tracker);\n}\n\nvoid policy_context::start_backup_app_partitions_unlocked(int32_t app_id)\n{\n    auto iter = _progress.unfinished_partitions_per_app.find(app_id);\n    dassert(iter != _progress.unfinished_partitions_per_app.end(),\n            \"%s: can't find app(%d) in unfinished apps\",\n            _backup_sig.c_str(),\n            app_id);\n    for (int32_t i = 0; i < iter->second; ++i) {\n        start_backup_partition_unlocked(gpid(app_id, i));\n    }\n}\n\nvoid policy_context::write_backup_app_finish_flag_unlocked(int32_t app_id,\n                                                           dsn::task_ptr write_callback)\n{\n    if (_progress.is_app_skipped[app_id]) {\n        dwarn(\"app is unavaliable, skip write finish flag for this app(app_id = %d)\", app_id);\n        if (write_callback != nullptr) {\n            write_callback->enqueue();\n        }\n        return;\n    }\n\n    backup_flag flag;\n    flag.total_checkpoint_size = 0;\n\n    for (const auto &pair : _progress.app_chkpt_size[app_id]) {\n        flag.total_checkpoint_size += pair.second;\n    }\n\n    dsn::error_code err;\n    dist::block_service::block_file_ptr remote_file;\n\n    dist::block_service::create_file_request create_file_req;\n    create_file_req.ignore_metadata = true;\n    create_file_req.file_name =\n        cold_backup::get_app_backup_status_file(_backup_service->backup_root(),\n                                                _policy.app_names.at(app_id),\n                                                app_id,\n                                                _cur_backup.backup_id);\n    // here we can use synchronous way coz create_file with ignored metadata is very fast\n    _block_service\n        ->create_file(create_file_req,\n                      TASK_CODE_EXEC_INLINED,\n                      [&err, &remote_file](const dist::block_service::create_file_response &resp) {\n                          err = resp.err;\n                          remote_file = resp.file_handle;\n                      })\n        ->wait();\n\n    if (err != ERR_OK) {\n        derror(\"%s: create file %s failed, restart this backup later\",\n               _backup_sig.c_str(),\n               create_file_req.file_name.c_str());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this, app_id, write_callback]() {\n                             zauto_lock l(_lock);\n                             write_backup_app_finish_flag_unlocked(app_id, write_callback);\n                         },\n                         0,\n                         _backup_service->backup_option().block_retry_delay_ms);\n        return;\n    }\n\n    dassert(remote_file != nullptr,\n            \"%s: create file(%s) succeed, but can't get handle\",\n            _backup_sig.c_str(),\n            create_file_req.file_name.c_str());\n    if (remote_file->get_size() > 0) {\n        // we only focus whether app_backup_status file is exist, so ignore app_backup_status file's\n        // context\n        ddebug(\"app(%d) already write finish-flag on block service\", app_id);\n        if (write_callback != nullptr) {\n            write_callback->enqueue();\n        }\n        return;\n    }\n\n    blob buf = ::dsn::json::json_forwarder<backup_flag>::encode(flag);\n\n    remote_file->write(\n        dist::block_service::write_request{buf},\n        LPC_DEFAULT_CALLBACK,\n        [this, app_id, write_callback, remote_file](\n            const dist::block_service::write_response &resp) {\n            if (resp.err == ERR_OK) {\n                ddebug(\"app(%d) finish backup and write finish-flag on block service succeed\",\n                       app_id);\n                if (write_callback != nullptr) {\n                    write_callback->enqueue();\n                }\n            } else if (resp.err == ERR_FS_INTERNAL) {\n                zauto_lock l(_lock);\n                _is_backup_failed = true;\n                derror_f(\"write {} failed, err = {}, don't try again when got this error.\",\n                         remote_file->file_name(),\n                         resp.err.to_string());\n                return;\n            } else {\n                dwarn(\"write %s failed, reason(%s), try it later\",\n                      remote_file->file_name().c_str(),\n                      resp.err.to_string());\n                tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                                 &_tracker,\n                                 [this, app_id, write_callback]() {\n                                     zauto_lock l(_lock);\n                                     write_backup_app_finish_flag_unlocked(app_id, write_callback);\n                                 },\n                                 0,\n                                 _backup_service->backup_option().block_retry_delay_ms);\n            }\n        });\n}\n\nvoid policy_context::finish_backup_app_unlocked(int32_t app_id)\n{\n    ddebug(\"%s: finish backup for app(%d), progress(%d)\",\n           _backup_sig.c_str(),\n           app_id,\n           _progress.unfinished_apps);\n    if (--_progress.unfinished_apps == 0) {\n        ddebug(\"%s: finish current backup for all apps\", _backup_sig.c_str());\n        _cur_backup.end_time_ms = dsn_now_ms();\n\n        task_ptr write_backup_info_callback =\n            tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this]() {\n                task_ptr start_a_new_backup =\n                    tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this]() {\n                        zauto_lock l(_lock);\n                        auto iter = _backup_history.emplace(_cur_backup.backup_id, _cur_backup);\n                        dassert(iter.second,\n                                \"%s: backup_id(%lld) already in the backup_history\",\n                                _policy.policy_name.c_str(),\n                                _cur_backup.backup_id);\n                        _cur_backup.start_time_ms = 0;\n                        _cur_backup.end_time_ms = 0;\n                        ddebug(\"%s: finish an old backup, try to start a new one\",\n                               _backup_sig.c_str());\n                        issue_new_backup_unlocked();\n                    });\n                sync_backup_to_remote_storage_unlocked(_cur_backup, start_a_new_backup, false);\n            });\n        write_backup_info_unlocked(_cur_backup, write_backup_info_callback);\n    }\n}\n\nvoid policy_context::write_backup_info_unlocked(const backup_info &b_info,\n                                                dsn::task_ptr write_callback)\n{\n    dsn::error_code err;\n    dist::block_service::block_file_ptr remote_file;\n\n    dist::block_service::create_file_request create_file_req;\n    create_file_req.ignore_metadata = true;\n    create_file_req.file_name =\n        cold_backup::get_backup_info_file(_backup_service->backup_root(), b_info.backup_id);\n    // here we can use synchronous way coz create_file with ignored metadata is very fast\n    _block_service\n        ->create_file(create_file_req,\n                      TASK_CODE_EXEC_INLINED,\n                      [&err, &remote_file](const dist::block_service::create_file_response &resp) {\n                          err = resp.err;\n                          remote_file = resp.file_handle;\n                      })\n        ->wait();\n\n    if (err != ERR_OK) {\n        derror(\"%s: create file %s failed, restart this backup later\",\n               _backup_sig.c_str(),\n               create_file_req.file_name.c_str());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this, b_info, write_callback]() {\n                             zauto_lock l(_lock);\n                             write_backup_info_unlocked(b_info, write_callback);\n                         },\n                         0,\n                         _backup_service->backup_option().block_retry_delay_ms);\n        return;\n    }\n\n    dassert(remote_file != nullptr,\n            \"%s: create file(%s) succeed, but can't get handle\",\n            _backup_sig.c_str(),\n            create_file_req.file_name.c_str());\n\n    blob buf = dsn::json::json_forwarder<backup_info>::encode(b_info);\n\n    remote_file->write(\n        dist::block_service::write_request{buf},\n        LPC_DEFAULT_CALLBACK,\n        [this, b_info, write_callback, remote_file](\n            const dist::block_service::write_response &resp) {\n            if (resp.err == ERR_OK) {\n                ddebug(\"policy(%s) write backup_info to cold backup media succeed\",\n                       _policy.policy_name.c_str());\n                if (write_callback != nullptr) {\n                    write_callback->enqueue();\n                }\n            } else if (resp.err == ERR_FS_INTERNAL) {\n                zauto_lock l(_lock);\n                _is_backup_failed = true;\n                derror_f(\"write {} failed, err = {}, don't try again when got this error.\",\n                         remote_file->file_name(),\n                         resp.err.to_string());\n                return;\n            } else {\n                dwarn(\"write %s failed, reason(%s), try it later\",\n                      remote_file->file_name().c_str(),\n                      resp.err.to_string());\n                tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                                 &_tracker,\n                                 [this, b_info, write_callback]() {\n                                     zauto_lock l(_lock);\n                                     write_backup_info_unlocked(b_info, write_callback);\n                                 },\n                                 0,\n                                 _backup_service->backup_option().block_retry_delay_ms);\n            }\n        });\n}\n\nbool policy_context::update_partition_progress_unlocked(gpid pid,\n                                                        int32_t progress,\n                                                        const rpc_address &source)\n{\n    int32_t &local_progress = _progress.partition_progress[pid];\n    if (local_progress == cold_backup_constant::PROGRESS_FINISHED) {\n        dwarn_f(\"{}: backup of partition {} has been finished, ignore the backup response from {} \",\n                _backup_sig,\n                pid.to_string(),\n                source.to_string());\n        return true;\n    }\n\n    if (progress < local_progress) {\n        dwarn_f(\"{}: local backup progress {} is larger than progress {} from server {} for \"\n                \"partition {}, perhaps it's primary has changed\",\n                _backup_sig,\n                local_progress,\n                progress,\n                source.to_string(),\n                pid.to_string());\n    }\n\n    local_progress = progress;\n    dinfo_f(\n        \"{}: update partition {} backup progress to {}.\", _backup_sig, pid.to_string(), progress);\n    if (local_progress == cold_backup_constant::PROGRESS_FINISHED) {\n        ddebug_f(\"{}: finish backup for partition {}, the app has {} unfinished backup \"\n                 \"partition now.\",\n                 _backup_sig,\n                 pid.to_string(),\n                 _progress.unfinished_partitions_per_app[pid.get_app_id()]);\n\n        // update the progress-chain: partition => app => current_backup_instance\n        if (--_progress.unfinished_partitions_per_app[pid.get_app_id()] == 0) {\n            dsn::task_ptr task_after_write_finish_flag =\n                tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this, pid]() {\n                    zauto_lock l(_lock);\n                    finish_backup_app_unlocked(pid.get_app_id());\n                });\n            write_backup_app_finish_flag_unlocked(pid.get_app_id(), task_after_write_finish_flag);\n        }\n    }\n    return local_progress == cold_backup_constant::PROGRESS_FINISHED;\n}\n\nvoid policy_context::record_partition_checkpoint_size_unlock(const gpid &pid, int64_t size)\n{\n    _progress.app_chkpt_size[pid.get_app_id()][pid.get_partition_index()] = size;\n}\n\nvoid policy_context::start_backup_partition_unlocked(gpid pid)\n{\n    dsn::rpc_address partition_primary;\n    {\n        // check app and partition status\n        zauto_read_lock l;\n        _backup_service->get_state()->lock_read(l);\n        const app_state *app = _backup_service->get_state()->get_app(pid.get_app_id()).get();\n\n        if (app == nullptr || app->status == app_status::AS_DROPPED) {\n            dwarn_f(\n                \"{}: app {} is not available, skip to backup it.\", _backup_sig, pid.get_app_id());\n            _progress.is_app_skipped[pid.get_app_id()] = true;\n            update_partition_progress_unlocked(\n                pid, cold_backup_constant::PROGRESS_FINISHED, dsn::rpc_address());\n            return;\n        }\n        partition_primary = app->partitions[pid.get_partition_index()].primary;\n    }\n    if (partition_primary.is_invalid()) {\n        dwarn_f(\"{}: partition {} doesn't have a primary now, retry to backup it later\",\n                _backup_sig,\n                pid.to_string());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this, pid]() {\n                             zauto_lock l(_lock);\n                             start_backup_partition_unlocked(pid);\n                         },\n                         0,\n                         _backup_service->backup_option().reconfiguration_retry_delay_ms);\n        return;\n    }\n\n    backup_request req;\n    req.pid = pid;\n    req.policy = *(static_cast<const policy_info *>(&_policy));\n    req.backup_id = _cur_backup.backup_id;\n    req.app_name = _policy.app_names.at(pid.get_app_id());\n    dsn::message_ex *request =\n        dsn::message_ex::create_request(RPC_COLD_BACKUP, 0, pid.thread_hash());\n    dsn::marshall(request, req);\n    dsn::rpc_response_task_ptr rpc_callback = rpc::create_rpc_response_task(\n        request,\n        &_tracker,\n        [this, pid, partition_primary](error_code err, backup_response &&response) {\n            on_backup_reply(err, std::move(response), pid, partition_primary);\n        });\n    ddebug_f(\"{}: send backup command to partition {}, target_addr = {}\",\n             _backup_sig,\n             pid.to_string(),\n             partition_primary.to_string());\n    _backup_service->get_meta_service()->send_request(request, partition_primary, rpc_callback);\n}\n\nvoid policy_context::on_backup_reply(error_code err,\n                                     backup_response &&response,\n                                     gpid pid,\n                                     const rpc_address &primary)\n{\n    ddebug_f(\"{}: receive backup response for partition {} from server {}.\",\n             _backup_sig,\n             pid.to_string(),\n             primary.to_string());\n    if (err == dsn::ERR_OK && response.err == dsn::ERR_OK) {\n        dassert(response.policy_name == _policy.policy_name,\n                \"policy name(%s vs %s) don't match, pid(%d.%d), replica_server(%s)\",\n                _policy.policy_name.c_str(),\n                response.policy_name.c_str(),\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                primary.to_string());\n        dassert(response.pid == pid,\n                \"%s: backup pid[(%d.%d) vs (%d.%d)] don't match, replica_server(%s)\",\n                _policy.policy_name.c_str(),\n                response.pid.get_app_id(),\n                response.pid.get_partition_index(),\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                primary.to_string());\n        dassert(response.backup_id <= _cur_backup.backup_id,\n                \"%s: replica server(%s) has bigger backup_id(%lld), gpid(%d.%d)\",\n                _backup_sig.c_str(),\n                primary.to_string(),\n                response.backup_id,\n                pid.get_app_id(),\n                pid.get_partition_index());\n\n        if (response.backup_id < _cur_backup.backup_id) {\n            dwarn_f(\"{}: got a backup response of partition {} from server {}, whose backup id \"\n                    \"{} is smaller than current backup id {},  maybe it is a stale message\",\n                    _backup_sig,\n                    pid.to_string(),\n                    primary.to_string(),\n                    response.backup_id,\n                    _cur_backup.backup_id);\n        } else {\n            zauto_lock l(_lock);\n            record_partition_checkpoint_size_unlock(pid, response.checkpoint_total_size);\n            if (update_partition_progress_unlocked(pid, response.progress, primary)) {\n                // partition backup finished\n                return;\n            }\n        }\n    } else if (response.err == dsn::ERR_LOCAL_APP_FAILURE) {\n        zauto_lock l(_lock);\n        _is_backup_failed = true;\n        derror_f(\"{}: backup got error {} for partition {} from {}, don't try again when got \"\n                 \"this error.\",\n                 _backup_sig.c_str(),\n                 response.err.to_string(),\n                 pid.to_string(),\n                 primary.to_string());\n        return;\n    } else {\n        dwarn_f(\"{}: backup got error for partition {} from {}, rpc error {}, response error {}\",\n                _backup_sig.c_str(),\n                pid.to_string(),\n                primary.to_string(),\n                err.to_string(),\n                response.err.to_string());\n    }\n\n    // retry to backup the partition.\n    tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                     &_tracker,\n                     [this, pid]() {\n                         zauto_lock l(_lock);\n                         start_backup_partition_unlocked(pid);\n                     },\n                     0,\n                     _backup_service->backup_option().request_backup_period_ms);\n}\n\nvoid policy_context::initialize_backup_progress_unlocked()\n{\n    _progress.reset();\n\n    zauto_read_lock l;\n    _backup_service->get_state()->lock_read(l);\n\n    // NOTICE: the unfinished_apps is initialized with the app-set's size\n    // even if some apps are not available.\n    _progress.unfinished_apps = _cur_backup.app_ids.size();\n    for (const int32_t &app_id : _cur_backup.app_ids) {\n        const std::shared_ptr<app_state> &app = _backup_service->get_state()->get_app(app_id);\n        _progress.is_app_skipped[app_id] = true;\n        if (app == nullptr) {\n            dwarn(\"%s: app id(%d) is invalid\", _policy.policy_name.c_str(), app_id);\n        } else if (app->status != app_status::AS_AVAILABLE) {\n            dwarn(\"%s: %s is not available, status(%s)\",\n                  _policy.policy_name.c_str(),\n                  app->get_logname(),\n                  enum_to_string(app->status));\n        } else {\n            // NOTICE: only available apps have entry in\n            // unfinished_partitions_per_app & partition_progress & app_chkpt_size\n            _progress.unfinished_partitions_per_app[app_id] = app->partition_count;\n            std::map<int, int64_t> partition_chkpt_size;\n            for (const partition_configuration &pc : app->partitions) {\n                _progress.partition_progress[pc.pid] = 0;\n                partition_chkpt_size[pc.pid.get_app_id()] = 0;\n            }\n            _progress.app_chkpt_size[app_id] = std::move(partition_chkpt_size);\n            _progress.is_app_skipped[app_id] = false;\n        }\n    }\n}\n\nvoid policy_context::prepare_current_backup_on_new_unlocked()\n{\n    // initialize the current backup structure\n    _cur_backup.backup_id = _cur_backup.start_time_ms = static_cast<int64_t>(dsn_now_ms());\n    _cur_backup.app_ids = _policy.app_ids;\n    _cur_backup.app_names = _policy.app_names;\n    _is_backup_failed = false;\n\n    initialize_backup_progress_unlocked();\n    _backup_sig =\n        _policy.policy_name + \"@\" + boost::lexical_cast<std::string>(_cur_backup.backup_id);\n}\n\nvoid policy_context::sync_backup_to_remote_storage_unlocked(const backup_info &b_info,\n                                                            task_ptr sync_callback,\n                                                            bool create_new_node)\n{\n    dsn::blob backup_data = dsn::json::json_forwarder<backup_info>::encode(b_info);\n    std::string backup_info_path =\n        _backup_service->get_backup_path(_policy.policy_name, b_info.backup_id);\n\n    auto callback = [this, b_info, sync_callback, create_new_node](dsn::error_code err) {\n        if (dsn::ERR_OK == err || (create_new_node && ERR_NODE_ALREADY_EXIST == err)) {\n            ddebug(\"%s: synced backup_info(%\" PRId64 \") to remote storage successfully,\"\n                   \" start real backup work, new_node_create(%s)\",\n                   _policy.policy_name.c_str(),\n                   b_info.backup_id,\n                   create_new_node ? \"true\" : \"false\");\n            if (sync_callback != nullptr) {\n                sync_callback->enqueue();\n            } else {\n                dwarn(\"%s: empty callback\", _policy.policy_name.c_str());\n            }\n        } else if (ERR_TIMEOUT == err) {\n            derror(\"%s: sync backup info(\" PRId64 \") to remote storage got timeout, retry it later\",\n                   _policy.policy_name.c_str(),\n                   b_info.backup_id);\n            tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                             &_tracker,\n                             [this, b_info, sync_callback, create_new_node]() {\n                                 zauto_lock l(_lock);\n                                 sync_backup_to_remote_storage_unlocked(\n                                     std::move(b_info), std::move(sync_callback), create_new_node);\n                             },\n                             0,\n                             _backup_service->backup_option().meta_retry_delay_ms);\n        } else {\n            dassert(false,\n                    \"%s: we can't handle this right now, error(%s)\",\n                    _backup_sig.c_str(),\n                    err.to_string());\n        }\n    };\n\n    if (create_new_node) {\n        _backup_service->get_meta_service()->get_remote_storage()->create_node(\n            backup_info_path, LPC_DEFAULT_CALLBACK, callback, backup_data, nullptr);\n    } else {\n        _backup_service->get_meta_service()->get_remote_storage()->set_data(\n            backup_info_path, backup_data, LPC_DEFAULT_CALLBACK, callback, nullptr);\n    }\n}\n\nvoid policy_context::continue_current_backup_unlocked()\n{\n    if (_policy.is_disable) {\n        ddebug_f(\"{}: policy is disabled, ignore this backup and try it later\",\n                 _policy.policy_name);\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this]() {\n                             zauto_lock l(_lock);\n                             issue_new_backup_unlocked();\n                         },\n                         0,\n                         _backup_service->backup_option().issue_backup_interval_ms);\n        return;\n    }\n\n    for (const int32_t &app : _cur_backup.app_ids) {\n        if (_progress.unfinished_partitions_per_app.find(app) !=\n            _progress.unfinished_partitions_per_app.end()) {\n            start_backup_app_meta_unlocked(app);\n        } else {\n            dsn::task_ptr task_after_write_finish_flag =\n                tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this, app]() {\n                    zauto_lock l(_lock);\n                    finish_backup_app_unlocked(app);\n                });\n            write_backup_app_finish_flag_unlocked(app, task_after_write_finish_flag);\n        }\n    }\n}\n\nbool policy_context::should_start_backup_unlocked()\n{\n    uint64_t now = dsn_now_ms();\n    uint64_t recent_backup_start_time_ms = 0;\n    if (!_backup_history.empty()) {\n        recent_backup_start_time_ms = _backup_history.rbegin()->second.start_time_ms;\n    }\n\n    // the true start time of recent backup have drifted away with the origin start time of\n    // policy,\n    // so we should take the drift into consideration; if user change the start time of the\n    // policy,\n    // we just think the change of start time as drift\n    int32_t hour = 0, min = 0, sec = 0;\n    if (recent_backup_start_time_ms == 0) {\n        //  the first time to backup, just consider the start time\n        ::dsn::utils::time_ms_to_date_time(now, hour, min, sec);\n        return _policy.start_time.should_start_backup(hour, min);\n    } else {\n        uint64_t next_backup_time_ms =\n            recent_backup_start_time_ms + _policy.backup_interval_seconds * 1000;\n        if (_policy.start_time.hour != 24) {\n            // user have specify the time point to start backup, so we should take the the\n            // time-drift into consideration\n\n            // compute the time-drift\n            ::dsn::utils::time_ms_to_date_time(recent_backup_start_time_ms, hour, min, sec);\n            int64_t time_dirft_ms = _policy.start_time.compute_time_drift_ms(hour, min);\n\n            if (time_dirft_ms >= 0) {\n                // hour:min(the true start time) >= policy.start_time :\n                //      1, user move up the start time of policy, such as 20:00 to 2:00, we just\n                //      think this case as time drift\n                //      2, the true start time of backup is delayed, compared the origin start time\n                //      of policy, we should process this case\n                //      3, the true start time of backup is the same with the origin start time of\n                //      policy\n                next_backup_time_ms -= time_dirft_ms;\n            } else {\n                // hour:min(the true start time) < policy.start_time:\n                //      1, user delay the start time of policy, such as 2:00 to 23:00\n                //\n                // these case has already been handled, we do nothing\n            }\n        }\n        if (next_backup_time_ms <= now) {\n            ::dsn::utils::time_ms_to_date_time(now, hour, min, sec);\n            return _policy.start_time.should_start_backup(hour, min);\n        } else {\n            return false;\n        }\n    }\n}\n\nvoid policy_context::issue_new_backup_unlocked()\n{\n    // before issue new backup, we check whether the policy is dropped\n    if (_policy.is_disable) {\n        ddebug(\"%s: policy is disabled, just ignore backup, try it later\",\n               _policy.policy_name.c_str());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this]() {\n                             zauto_lock l(_lock);\n                             issue_new_backup_unlocked();\n                         },\n                         0,\n                         _backup_service->backup_option().issue_backup_interval_ms);\n        return;\n    }\n\n    if (!should_start_backup_unlocked()) {\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this]() {\n                             zauto_lock l(_lock);\n                             issue_new_backup_unlocked();\n                         },\n                         0,\n                         _backup_service->backup_option().issue_backup_interval_ms);\n        ddebug(\"%s: start issue new backup %\" PRId64 \"ms later\",\n               _policy.policy_name.c_str(),\n               _backup_service->backup_option().issue_backup_interval_ms.count());\n        return;\n    }\n\n    prepare_current_backup_on_new_unlocked();\n    // if all apps are dropped, we don't issue a new backup\n    if (_progress.unfinished_partitions_per_app.empty()) {\n        // TODO: just ignore this backup and wait next backup\n        dwarn(\"%s: all apps have been dropped, ignore this backup and retry it later\",\n              _backup_sig.c_str());\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         &_tracker,\n                         [this]() {\n                             zauto_lock l(_lock);\n                             issue_new_backup_unlocked();\n                         },\n                         0,\n                         _backup_service->backup_option().issue_backup_interval_ms);\n    } else {\n        task_ptr continue_to_backup =\n            tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this]() {\n                zauto_lock l(_lock);\n                continue_current_backup_unlocked();\n            });\n        sync_backup_to_remote_storage_unlocked(_cur_backup, continue_to_backup, true);\n    }\n}\n\nvoid policy_context::start()\n{\n    zauto_lock l(_lock);\n\n    if (_cur_backup.start_time_ms == 0) {\n        issue_new_backup_unlocked();\n    } else {\n        continue_current_backup_unlocked();\n    }\n\n    std::string counter_name = _policy.policy_name + \".recent.backup.duration(ms)\";\n    _counter_policy_recent_backup_duration_ms.init_app_counter(\n        \"eon.meta.policy\",\n        counter_name.c_str(),\n        COUNTER_TYPE_NUMBER,\n        \"policy recent backup duration time\");\n\n    issue_gc_backup_info_task_unlocked();\n    ddebug(\"%s: start gc backup info task succeed\", _policy.policy_name.c_str());\n}\n\nvoid policy_context::add_backup_history(const backup_info &info)\n{\n    zauto_lock l(_lock);\n    if (info.end_time_ms <= 0) {\n        ddebug(\"%s: encounter an unfished backup_info(%lld), start_time(%lld), continue it later\",\n               _policy.policy_name.c_str(),\n               info.backup_id,\n               info.start_time_ms);\n        dassert(_cur_backup.start_time_ms == 0,\n                \"%s: shouldn't have multiple unfinished backup instance in a policy, %lld vs %lld\",\n                _policy.policy_name.c_str(),\n                _cur_backup.backup_id,\n                info.backup_id);\n        dassert(_backup_history.empty() || info.backup_id > _backup_history.rbegin()->first,\n                \"%s: backup_id(%lld) in history larger than current(%lld)\",\n                _policy.policy_name.c_str(),\n                _backup_history.rbegin()->first,\n                info.backup_id);\n        _cur_backup = info;\n        initialize_backup_progress_unlocked();\n        _backup_sig =\n            _policy.policy_name + \"@\" + boost::lexical_cast<std::string>(_cur_backup.backup_id);\n    } else {\n        ddebug(\"%s: add backup history, id(%lld), start_time(%lld), endtime(%lld)\",\n               _policy.policy_name.c_str(),\n               info.backup_id,\n               info.start_time_ms,\n               info.end_time_ms);\n        dassert(_cur_backup.end_time_ms == 0 || info.backup_id < _cur_backup.backup_id,\n                \"%s: backup_id(%lld) in history larger than current(%lld)\",\n                _policy.policy_name.c_str(),\n                info.backup_id,\n                _cur_backup.backup_id);\n\n        auto result_pair = _backup_history.emplace(info.backup_id, info);\n        dassert(result_pair.second,\n                \"%s: conflict backup id(%lld)\",\n                _policy.policy_name.c_str(),\n                info.backup_id);\n    }\n}\n\nstd::vector<backup_info> policy_context::get_backup_infos(int cnt)\n{\n    zauto_lock l(_lock);\n\n    std::vector<backup_info> ret;\n\n    if (cnt > 0 && _cur_backup.start_time_ms > 0) {\n        ret.emplace_back(_cur_backup);\n        cnt--;\n    }\n\n    for (auto it = _backup_history.rbegin(); it != _backup_history.rend() && cnt > 0; it++) {\n        cnt--;\n        ret.emplace_back(it->second);\n    }\n    return ret;\n}\n\nbool policy_context::is_under_backuping()\n{\n    zauto_lock l(_lock);\n    if (!_is_backup_failed && _cur_backup.start_time_ms > 0 && _cur_backup.end_time_ms <= 0) {\n        return true;\n    }\n    return false;\n}\n\nvoid policy_context::set_policy(const policy &p)\n{\n    zauto_lock l(_lock);\n\n    const std::string old_backup_provider_type = _policy.backup_provider_type;\n    _policy = p;\n    if (_policy.backup_provider_type != old_backup_provider_type) {\n        _block_service = _backup_service->get_meta_service()\n                             ->get_block_service_manager()\n                             .get_or_create_block_filesystem(_policy.backup_provider_type);\n    }\n    dassert(_block_service,\n            \"can't initialize block filesystem by provider (%s)\",\n            _policy.backup_provider_type.c_str());\n}\n\npolicy policy_context::get_policy()\n{\n    zauto_lock l(_lock);\n    return _policy;\n}\n\nvoid policy_context::gc_backup_info_unlocked(const backup_info &info_to_gc)\n{\n    char start_time[30] = {'\\0'};\n    char end_time[30] = {'\\0'};\n    ::dsn::utils::time_ms_to_date_time(\n        static_cast<uint64_t>(info_to_gc.start_time_ms), start_time, 30);\n    ::dsn::utils::time_ms_to_date_time(static_cast<uint64_t>(info_to_gc.end_time_ms), end_time, 30);\n    ddebug(\"%s: start to gc backup info, backup_id(%\" PRId64 \"), start_time(%s), end_time(%s)\",\n           _policy.policy_name.c_str(),\n           info_to_gc.backup_id,\n           start_time,\n           end_time);\n\n    dsn::task_ptr sync_callback =\n        ::dsn::tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this, info_to_gc]() {\n            dist::block_service::remove_path_request req;\n            req.path =\n                cold_backup::get_backup_path(_backup_service->backup_root(), info_to_gc.backup_id);\n            req.recursive = true;\n            _block_service->remove_path(\n                req,\n                LPC_DEFAULT_CALLBACK,\n                [this, info_to_gc](const dist::block_service::remove_path_response &resp) {\n                    // remove dir ok or dir is not exist\n                    if (resp.err == ERR_OK || resp.err == ERR_OBJECT_NOT_FOUND) {\n                        dsn::task_ptr remove_local_backup_info_task = tasking::create_task(\n                            LPC_DEFAULT_CALLBACK, &_tracker, [this, info_to_gc]() {\n                                zauto_lock l(_lock);\n                                _backup_history.erase(info_to_gc.backup_id);\n                                issue_gc_backup_info_task_unlocked();\n                            });\n                        sync_remove_backup_info(info_to_gc, remove_local_backup_info_task);\n                    } else { // ERR_FS_INTERNAL, ERR_TIMEOUT, ERR_DIR_NOT_EMPTY\n                        dwarn(\"%s: gc backup info, id(%\" PRId64\n                              \") failed, with err = %s, just try again\",\n                              _policy.policy_name.c_str(),\n                              info_to_gc.backup_id,\n                              resp.err.to_string());\n                        gc_backup_info_unlocked(info_to_gc);\n                    }\n                });\n        });\n    sync_backup_to_remote_storage_unlocked(info_to_gc, sync_callback, false);\n}\n\nvoid policy_context::issue_gc_backup_info_task_unlocked()\n{\n    if (_backup_history.size() > _policy.backup_history_count_to_keep) {\n        backup_info &info = _backup_history.begin()->second;\n        info.info_status = backup_info_status::type::DELETING;\n        ddebug(\"%s: start to gc backup info with id(%\" PRId64 \")\",\n               _policy.policy_name.c_str(),\n               info.backup_id);\n\n        tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this, info]() {\n            gc_backup_info_unlocked(info);\n        })->enqueue();\n    } else {\n        // there is no extra backup to gc, we just issue a new task to call\n        // issue_gc_backup_info_task_unlocked later\n        dinfo(\"%s: no need to gc backup info, start it later\", _policy.policy_name.c_str());\n        tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this]() {\n            zauto_lock l(_lock);\n            issue_gc_backup_info_task_unlocked();\n        })->enqueue(std::chrono::minutes(3));\n    }\n\n    // update recent backup duration time\n    uint64_t last_backup_duration_time_ms = 0;\n    if (_cur_backup.start_time_ms == 0) {\n        if (!_backup_history.empty()) {\n            const backup_info &b_info = _backup_history.rbegin()->second;\n            last_backup_duration_time_ms = (b_info.end_time_ms - b_info.start_time_ms);\n        }\n    } else if (_cur_backup.start_time_ms > 0) {\n        if (_cur_backup.end_time_ms == 0) {\n            last_backup_duration_time_ms = (dsn_now_ms() - _cur_backup.start_time_ms);\n        } else if (_cur_backup.end_time_ms > 0) {\n            last_backup_duration_time_ms = (_cur_backup.end_time_ms - _cur_backup.start_time_ms);\n        }\n    }\n    _counter_policy_recent_backup_duration_ms->set(last_backup_duration_time_ms);\n}\n\nvoid policy_context::sync_remove_backup_info(const backup_info &info, dsn::task_ptr sync_callback)\n{\n    std::string backup_info_path =\n        _backup_service->get_backup_path(_policy.policy_name, info.backup_id);\n    auto callback = [this, info, sync_callback](dsn::error_code err) {\n        if (err == dsn::ERR_OK || err == dsn::ERR_OBJECT_NOT_FOUND) {\n            ddebug(\"%s: sync remove backup_info on remote storage successfully, backup_id(%\" PRId64\n                   \")\",\n                   _policy.policy_name.c_str(),\n                   info.backup_id);\n            if (sync_callback != nullptr) {\n                sync_callback->enqueue();\n            }\n        } else if (err == ERR_TIMEOUT) {\n            derror(\"%s: sync remove backup info on remote storage got timeout, retry it later\",\n                   _policy.policy_name.c_str());\n            tasking::enqueue(\n                LPC_DEFAULT_CALLBACK,\n                &_tracker,\n                [this, info, sync_callback]() { sync_remove_backup_info(info, sync_callback); },\n                0,\n                _backup_service->backup_option().meta_retry_delay_ms);\n        } else {\n            dassert(false,\n                    \"%s: we can't handle this right now, error(%s)\",\n                    _policy.policy_name.c_str(),\n                    err.to_string());\n        }\n    };\n\n    _backup_service->get_meta_service()->get_remote_storage()->delete_node(\n        backup_info_path, true, LPC_DEFAULT_CALLBACK, callback, nullptr);\n}\n\nbackup_service::backup_service(meta_service *meta_svc,\n                               const std::string &policy_meta_root,\n                               const std::string &backup_root,\n                               const policy_factory &factory)\n    : _factory(factory),\n      _meta_svc(meta_svc),\n      _policy_meta_root(policy_meta_root),\n      _backup_root(backup_root)\n{\n    _state = _meta_svc->get_server_state();\n\n    _opt.meta_retry_delay_ms = 10000_ms;\n    _opt.block_retry_delay_ms = 60000_ms;\n    _opt.app_dropped_retry_delay_ms = 600000_ms;\n    _opt.reconfiguration_retry_delay_ms = 15000_ms;\n    _opt.request_backup_period_ms = 10000_ms;\n    _opt.issue_backup_interval_ms = 300000_ms;\n\n    _in_initialize.store(true);\n}\n\nvoid backup_service::start_create_policy_meta_root(dsn::task_ptr callback)\n{\n    dinfo(\"create policy meta root(%s) on remote_storage\", _policy_meta_root.c_str());\n    _meta_svc->get_remote_storage()->create_node(\n        _policy_meta_root, LPC_DEFAULT_CALLBACK, [this, callback](dsn::error_code err) {\n            if (err == dsn::ERR_OK || err == ERR_NODE_ALREADY_EXIST) {\n                ddebug(\"create policy meta root(%s) succeed, with err(%s)\",\n                       _policy_meta_root.c_str(),\n                       err.to_string());\n                callback->enqueue();\n            } else if (err == dsn::ERR_TIMEOUT) {\n                derror(\"create policy meta root(%s) timeout, try it later\",\n                       _policy_meta_root.c_str());\n                dsn::tasking::enqueue(\n                    LPC_DEFAULT_CALLBACK,\n                    &_tracker,\n                    std::bind(&backup_service::start_create_policy_meta_root, this, callback),\n                    0,\n                    _opt.meta_retry_delay_ms);\n            } else {\n                dassert(false, \"we can't handle this error(%s) right now\", err.to_string());\n            }\n        });\n}\n\nvoid backup_service::start_sync_policies()\n{\n    // TODO: make sync_policies_from_remote_storage function to async\n    //       sync-api will leader to deadlock when the threadnum = 1 in default threadpool\n    ddebug(\"backup service start to sync policies from remote storage\");\n    dsn::error_code err = sync_policies_from_remote_storage();\n    if (err == dsn::ERR_OK) {\n        for (auto &policy_kv : _policy_states) {\n            ddebug(\"policy(%s) start to backup\", policy_kv.first.c_str());\n            policy_kv.second->start();\n        }\n        if (_policy_states.empty()) {\n            dwarn(\"can't sync policies from remote storage, user should config some policies\");\n        }\n        _in_initialize.store(false);\n    } else if (err == dsn::ERR_TIMEOUT) {\n        derror(\"sync policies got timeout, retry it later\");\n        dsn::tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                              &_tracker,\n                              std::bind(&backup_service::start_sync_policies, this),\n                              0,\n                              _opt.meta_retry_delay_ms);\n    } else {\n        dassert(false,\n                \"sync policies from remote storage encounter error(%s), we can't handle \"\n                \"this right now\");\n    }\n}\n\nerror_code backup_service::sync_policies_from_remote_storage()\n{\n    // policy on remote storage:\n    //      -- <root>/policy_name/backup_id_1\n    //      --                   /backup_id_2\n    error_code err = ERR_OK;\n    dsn::task_tracker tracker;\n\n    auto init_backup_info = [this, &err, &tracker](const std::string &policy_name) {\n        auto after_get_backup_info = [this, &err, policy_name](error_code ec, const blob &value) {\n            if (ec == ERR_OK) {\n                dinfo(\"sync a backup string(%s) from remote storage\", value.data());\n                backup_info tbackup_info;\n                dsn::json::json_forwarder<backup_info>::decode(value, tbackup_info);\n\n                policy_context *ptr = nullptr;\n                {\n                    zauto_lock l(_lock);\n                    auto it = _policy_states.find(policy_name);\n                    if (it == _policy_states.end()) {\n                        dassert(false,\n                                \"before initializing the backup_info, initialize the policy first\");\n                        return;\n                    }\n                    ptr = it->second.get();\n                }\n                ptr->add_backup_history(tbackup_info);\n            } else {\n                err = ec;\n                ddebug(\"init backup_info from remote storage fail, error_code = %s\",\n                       ec.to_string());\n            }\n        };\n        std::string backup_info_root = get_policy_path(policy_name);\n\n        _meta_svc->get_remote_storage()->get_children(\n            backup_info_root,\n            LPC_DEFAULT_CALLBACK, // TASK_CODE_EXEC_INLINED,\n            [this, &err, &tracker, policy_name, after_get_backup_info](\n                error_code ec, const std::vector<std::string> &children) {\n                if (ec == ERR_OK) {\n                    if (children.size() > 0) {\n                        for (const auto &b_id : children) {\n                            int64_t backup_id = boost::lexical_cast<int64_t>(b_id);\n                            std::string backup_path = get_backup_path(policy_name, backup_id);\n                            ddebug(\"start to acquire backup_info(%\" PRId64 \") of policy(%s)\",\n                                   backup_id,\n                                   policy_name.c_str());\n                            _meta_svc->get_remote_storage()->get_data(\n                                backup_path,\n                                TASK_CODE_EXEC_INLINED,\n                                std::move(after_get_backup_info),\n                                &tracker);\n                        }\n                    } else // have not backup\n                    {\n                        ddebug(\"policy has not started a backup process, policy_name = %s\",\n                               policy_name.c_str());\n                    }\n                } else {\n                    err = ec;\n                    derror(\"get backup info dirs fail from remote storage, backup_dirs_root = %s, \"\n                           \"err = %s\",\n                           get_policy_path(policy_name).c_str(),\n                           ec.to_string());\n                }\n            },\n            &tracker);\n    };\n\n    auto init_one_policy =\n        [this, &err, &tracker, &init_backup_info](const std::string &policy_name) {\n            auto policy_path = get_policy_path(policy_name);\n            ddebug(\"start to acquire the context of policy(%s)\", policy_name.c_str());\n            _meta_svc->get_remote_storage()->get_data(\n                policy_path,\n                LPC_DEFAULT_CALLBACK, // TASK_CODE_EXEC_INLINED,\n                [this, &err, &init_backup_info, policy_path, policy_name](error_code ec,\n                                                                          const blob &value) {\n                    if (ec == ERR_OK) {\n                        std::shared_ptr<policy_context> policy_ctx = _factory(this);\n                        policy tpolicy;\n                        dsn::json::json_forwarder<policy>::decode(value, tpolicy);\n                        policy_ctx->set_policy(std::move(tpolicy));\n\n                        {\n                            zauto_lock l(_lock);\n                            _policy_states.insert(std::make_pair(policy_name, policy_ctx));\n                        }\n                        init_backup_info(policy_name);\n                    } else {\n                        err = ec;\n                        derror(\"init policy fail, policy_path = %s, error_code = %s\",\n                               policy_path.c_str(),\n                               ec.to_string());\n                    }\n                },\n                &tracker);\n        };\n\n    _meta_svc->get_remote_storage()->get_children(\n        _policy_meta_root,\n        LPC_DEFAULT_CALLBACK, // TASK_CODE_EXEC_INLINED,,\n        [&err, &init_one_policy](error_code ec, const std::vector<std::string> &children) {\n            if (ec == ERR_OK) {\n                // children's name is name of each policy\n                for (const auto &policy_name : children) {\n                    init_one_policy(policy_name);\n                }\n            } else {\n                err = ec;\n                derror(\"get policy dirs from remote storage fail, error_code = %s\", ec.to_string());\n            }\n        },\n        &tracker);\n    tracker.wait_outstanding_tasks();\n    return err;\n}\n\nvoid backup_service::start()\n{\n    dsn::task_ptr after_create_policy_meta_root =\n        tasking::create_task(LPC_DEFAULT_CALLBACK, &_tracker, [this]() { start_sync_policies(); });\n    start_create_policy_meta_root(after_create_policy_meta_root);\n}\n\nvoid backup_service::add_backup_policy(dsn::message_ex *msg)\n{\n    configuration_add_backup_policy_request request;\n    configuration_add_backup_policy_response response;\n\n    ::dsn::unmarshall(msg, request);\n    std::set<int32_t> app_ids;\n    std::map<int32_t, std::string> app_names;\n\n    // The backup interval must be greater than checkpoint reserve time.\n    // Or the next cold backup checkpoint may be cleared by the clear operation.\n    if (request.backup_interval_seconds <=\n        _meta_svc->get_options().cold_backup_checkpoint_reserve_minutes * 60) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.hint_message = fmt::format(\n            \"backup interval must be greater than cold_backup_checkpoint_reserve_minutes={}\",\n            _meta_svc->get_options().cold_backup_checkpoint_reserve_minutes);\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n        return;\n    }\n\n    {\n        // check app status\n        zauto_read_lock l;\n        _state->lock_read(l);\n        for (auto &app_id : request.app_ids) {\n            const std::shared_ptr<app_state> &app = _state->get_app(app_id);\n            if (app == nullptr) {\n                derror_f(\"app {} doesn't exist, policy {} shouldn't be added.\",\n                         app_id,\n                         request.policy_name);\n                response.err = ERR_INVALID_PARAMETERS;\n                response.hint_message = \"invalid app \" + std::to_string(app_id);\n                _meta_svc->reply_data(msg, response);\n                msg->release_ref();\n                return;\n            }\n            app_ids.insert(app_id);\n            app_names.insert(std::make_pair(app_id, app->app_name));\n        }\n    }\n\n    {\n        // check policy name\n        zauto_lock l(_lock);\n        if (!is_valid_policy_name_unlocked(request.policy_name)) {\n            response.err = ERR_INVALID_PARAMETERS;\n            response.hint_message = \"invalid policy_name: \" + request.policy_name;\n            _meta_svc->reply_data(msg, response);\n            msg->release_ref();\n            return;\n        }\n    }\n\n    // check backup provider\n    if (_meta_svc->get_block_service_manager().get_or_create_block_filesystem(\n            request.backup_provider_type) == nullptr) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.hint_message = \"invalid backup_provider_type: \" + request.backup_provider_type;\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n        return;\n    }\n\n    ddebug_f(\"start to add backup polciy {}.\", request.policy_name);\n    std::shared_ptr<policy_context> policy_context_ptr = _factory(this);\n    dassert(policy_context_ptr != nullptr, \"invalid policy_context\");\n    policy p;\n    p.policy_name = request.policy_name;\n    p.backup_provider_type = request.backup_provider_type;\n    p.backup_interval_seconds = request.backup_interval_seconds;\n    p.backup_history_count_to_keep = request.backup_history_count_to_keep;\n    p.start_time.parse_from(request.start_time);\n    p.app_ids = app_ids;\n    p.app_names = app_names;\n    policy_context_ptr->set_policy(std::move(p));\n    do_add_policy(msg, policy_context_ptr, response.hint_message);\n}\n\nvoid backup_service::do_add_policy(dsn::message_ex *req,\n                                   std::shared_ptr<policy_context> p,\n                                   const std::string &hint_msg)\n{\n    policy cur_policy = p->get_policy();\n\n    std::string policy_path = get_policy_path(cur_policy.policy_name);\n    blob value = json::json_forwarder<policy>::encode(cur_policy);\n    _meta_svc->get_remote_storage()->create_node(\n        policy_path,\n        LPC_DEFAULT_CALLBACK, // TASK_CODE_EXEC_INLINED,\n        [ this, req, p, hint_msg, policy_name = cur_policy.policy_name ](error_code err) {\n            if (err == ERR_OK || err == ERR_NODE_ALREADY_EXIST) {\n                configuration_add_backup_policy_response resp;\n                resp.hint_message = hint_msg;\n                resp.err = ERR_OK;\n                ddebug(\"add backup policy succeed, policy_name = %s\", policy_name.c_str());\n\n                _meta_svc->reply_data(req, resp);\n                req->release_ref();\n                {\n                    zauto_lock l(_lock);\n                    _policy_states.insert(std::make_pair(policy_name, p));\n                }\n                p->start();\n            } else if (err == ERR_TIMEOUT) {\n                derror(\"create backup policy on remote storage timeout, retry after %\" PRId64\n                       \"(ms)\",\n                       _opt.meta_retry_delay_ms.count());\n                tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                                 &_tracker,\n                                 std::bind(&backup_service::do_add_policy, this, req, p, hint_msg),\n                                 0,\n                                 _opt.meta_retry_delay_ms);\n                return;\n            } else {\n                dassert(false,\n                        \"we can't handle this when create backup policy, err(%s)\",\n                        err.to_string());\n            }\n        },\n        value);\n}\n\nvoid backup_service::do_update_policy_to_remote_storage(\n    configuration_modify_backup_policy_rpc rpc,\n    const policy &p,\n    std::shared_ptr<policy_context> &p_context_ptr)\n{\n    std::string policy_path = get_policy_path(p.policy_name);\n    blob value = json::json_forwarder<policy>::encode(p);\n    _meta_svc->get_remote_storage()->set_data(\n        policy_path, value, LPC_DEFAULT_CALLBACK, [this, rpc, p, p_context_ptr](error_code err) {\n            if (err == ERR_OK) {\n                configuration_modify_backup_policy_response resp;\n                resp.err = ERR_OK;\n                ddebug(\"update backup policy to remote storage succeed, policy_name = %s\",\n                       p.policy_name.c_str());\n                p_context_ptr->set_policy(p);\n            } else if (err == ERR_TIMEOUT) {\n                derror(\"update backup policy to remote storage failed, policy_name = %s, retry \"\n                       \"after %\" PRId64 \"(ms)\",\n                       p.policy_name.c_str(),\n                       _opt.meta_retry_delay_ms.count());\n                tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                                 &_tracker,\n                                 std::bind(&backup_service::do_update_policy_to_remote_storage,\n                                           this,\n                                           rpc,\n                                           p,\n                                           p_context_ptr),\n                                 0,\n                                 _opt.meta_retry_delay_ms);\n            } else {\n                dassert(false,\n                        \"we can't handle this when create backup policy, err(%s)\",\n                        err.to_string());\n            }\n        });\n}\n\nbool backup_service::is_valid_policy_name_unlocked(const std::string &policy_name)\n{\n    auto iter = _policy_states.find(policy_name);\n    return (iter == _policy_states.end());\n}\n\nvoid backup_service::query_backup_policy(query_backup_policy_rpc rpc)\n{\n    const configuration_query_backup_policy_request &request = rpc.request();\n    configuration_query_backup_policy_response &response = rpc.response();\n\n    response.err = ERR_OK;\n\n    std::vector<std::string> policy_names = request.policy_names;\n    if (policy_names.empty()) {\n        // default all the policy\n        zauto_lock l(_lock);\n        for (const auto &pair : _policy_states) {\n            policy_names.emplace_back(pair.first);\n        }\n    }\n    for (const auto &policy_name : policy_names) {\n        std::shared_ptr<policy_context> policy_context_ptr(nullptr);\n        {\n            zauto_lock l(_lock);\n            auto it = _policy_states.find(policy_name);\n            if (it != _policy_states.end()) {\n                policy_context_ptr = it->second;\n            }\n        }\n        if (policy_context_ptr == nullptr) {\n            if (!response.hint_msg.empty()) {\n                response.hint_msg += \"\\n\\t\";\n            }\n            response.hint_msg += std::string(\"invalid policy_name \" + policy_name);\n            continue;\n        }\n\n        policy cur_policy = policy_context_ptr->get_policy();\n        policy_entry p_entry;\n        p_entry.policy_name = cur_policy.policy_name;\n        p_entry.backup_provider_type = cur_policy.backup_provider_type;\n        p_entry.backup_interval_seconds = std::to_string(cur_policy.backup_interval_seconds);\n        p_entry.app_ids = cur_policy.app_ids;\n        p_entry.backup_history_count_to_keep = cur_policy.backup_history_count_to_keep;\n        p_entry.start_time = cur_policy.start_time.to_string();\n        p_entry.is_disable = cur_policy.is_disable;\n        response.policys.emplace_back(p_entry);\n        // acquire backup_infos\n        std::vector<backup_info> b_infos =\n            policy_context_ptr->get_backup_infos(request.backup_info_count);\n        std::vector<backup_entry> b_entries;\n        for (const auto &b_info : b_infos) {\n            backup_entry b_entry;\n            b_entry.backup_id = b_info.backup_id;\n            b_entry.start_time_ms = b_info.start_time_ms;\n            b_entry.end_time_ms = b_info.end_time_ms;\n            b_entry.app_ids = b_info.app_ids;\n            b_entries.emplace_back(b_entry);\n        }\n        response.backup_infos.emplace_back(std::move(b_entries));\n        // policy_context_ptr.reset();\n    }\n    if (response.policys.empty()) {\n        // have not pass a valid policy_name\n        if (!policy_names.empty()) {\n            response.err = ERR_INVALID_PARAMETERS;\n        }\n    }\n\n    if (!response.hint_msg.empty()) {\n        response.__isset.hint_msg = true;\n    }\n}\n\nvoid backup_service::modify_backup_policy(configuration_modify_backup_policy_rpc rpc)\n{\n    const configuration_modify_backup_policy_request &request = rpc.request();\n    configuration_modify_backup_policy_response &response = rpc.response();\n    response.err = ERR_OK;\n\n    std::shared_ptr<policy_context> context_ptr;\n    {\n        zauto_lock _(_lock);\n        auto iter = _policy_states.find(request.policy_name);\n        if (iter == _policy_states.end()) {\n            response.err = ERR_INVALID_PARAMETERS;\n            context_ptr = nullptr;\n        } else {\n            context_ptr = iter->second;\n        }\n    }\n    if (context_ptr == nullptr) {\n        return;\n    }\n    policy cur_policy = context_ptr->get_policy();\n\n    bool is_under_backup = context_ptr->is_under_backuping();\n    bool have_modify_policy = false;\n    std::vector<int32_t> valid_app_ids_to_add;\n    std::map<int32_t, std::string> id_to_app_names;\n    if (request.__isset.add_appids) {\n        // lock the _lock of server_state to acquire verify the apps that added to policy\n        zauto_read_lock l;\n        _state->lock_read(l);\n\n        for (const auto &appid : request.add_appids) {\n            const auto &app = _state->get_app(appid);\n            // TODO: if app is dropped, how to process\n            if (app == nullptr) {\n                dwarn(\"%s: add app to policy failed, because invalid app(%d), ignore it\",\n                      cur_policy.policy_name.c_str(),\n                      appid);\n            } else {\n                valid_app_ids_to_add.emplace_back(appid);\n                id_to_app_names.insert(std::make_pair(appid, app->app_name));\n                have_modify_policy = true;\n            }\n        }\n    }\n\n    if (request.__isset.is_disable) {\n        if (request.is_disable) {\n            if (is_under_backup) {\n                ddebug(\"%s: policy is under backuping, not allow to disable\",\n                       cur_policy.policy_name.c_str());\n                response.err = ERR_BUSY;\n            } else if (!cur_policy.is_disable) {\n                ddebug(\"%s: policy is marked to disable\", cur_policy.policy_name.c_str());\n                cur_policy.is_disable = true;\n                have_modify_policy = true;\n            } else { // cur_policy.is_disable = true\n                ddebug(\"%s: policy is already disabled\", cur_policy.policy_name.c_str());\n            }\n        } else {\n            if (cur_policy.is_disable) {\n                cur_policy.is_disable = false;\n                ddebug(\"%s: policy is marked to enable\", cur_policy.policy_name.c_str());\n                have_modify_policy = true;\n            } else {\n                ddebug(\"%s: policy is already enabled\", cur_policy.policy_name.c_str());\n                response.err = ERR_OK;\n                response.hint_message = std::string(\"policy is already enabled\");\n            }\n        }\n    }\n\n    if (request.__isset.add_appids && !valid_app_ids_to_add.empty()) {\n        for (const auto &appid : valid_app_ids_to_add) {\n            cur_policy.app_ids.insert(appid);\n            cur_policy.app_names.insert(std::make_pair(appid, id_to_app_names.at(appid)));\n            have_modify_policy = true;\n        }\n    }\n\n    if (request.__isset.removal_appids) {\n        for (const auto &appid : request.removal_appids) {\n            if (appid > 0) {\n                cur_policy.app_ids.erase(appid);\n                ddebug(\"%s: remove app(%d) to policy\", cur_policy.policy_name.c_str(), appid);\n                have_modify_policy = true;\n            } else {\n                dwarn(\"%s: invalid app_id(%d)\", cur_policy.policy_name.c_str(), (int32_t)appid);\n            }\n        }\n    }\n\n    if (request.__isset.new_backup_interval_sec) {\n        if (request.new_backup_interval_sec > 0) {\n            ddebug(\"%s: policy will change backup interval from %\" PRId64 \"(s) to %\" PRId64 \"(s)\",\n                   cur_policy.policy_name.c_str(),\n                   cur_policy.backup_interval_seconds,\n                   request.new_backup_interval_sec);\n            cur_policy.backup_interval_seconds = request.new_backup_interval_sec;\n            have_modify_policy = true;\n        } else {\n            dwarn(\"%s: invalid backup_interval_sec(%\" PRId64 \")\",\n                  cur_policy.policy_name.c_str(),\n                  request.new_backup_interval_sec);\n        }\n    }\n\n    if (request.__isset.backup_history_count_to_keep) {\n        if (request.backup_history_count_to_keep > 0) {\n            ddebug(\"%s: policy will change backup_history_count_to_keep from (%d) to (%d)\",\n                   cur_policy.policy_name.c_str(),\n                   cur_policy.backup_history_count_to_keep,\n                   request.backup_history_count_to_keep);\n            cur_policy.backup_history_count_to_keep = request.backup_history_count_to_keep;\n            have_modify_policy = true;\n        }\n    }\n\n    if (request.__isset.start_time) {\n        backup_start_time t_start_time;\n        if (t_start_time.parse_from(request.start_time)) {\n            ddebug(\"%s: policy change start_time from (%s) to (%s)\",\n                   cur_policy.policy_name.c_str(),\n                   cur_policy.start_time.to_string().c_str(),\n                   t_start_time.to_string().c_str());\n            cur_policy.start_time = t_start_time;\n            have_modify_policy = true;\n        }\n    }\n\n    if (have_modify_policy) {\n        do_update_policy_to_remote_storage(rpc, cur_policy, context_ptr);\n    }\n}\n\nstd::string backup_service::get_policy_path(const std::string &policy_name)\n{\n    std::stringstream ss;\n    ss << _policy_meta_root << \"/\" << policy_name;\n    return ss.str();\n}\n\nstd::string backup_service::get_backup_path(const std::string &policy_name, int64_t backup_id)\n{\n    std::stringstream ss;\n    ss << _policy_meta_root << \"/\" << policy_name << \"/\" << backup_id;\n    return ss.str();\n}\n\nvoid backup_service::start_backup_app(start_backup_app_rpc rpc)\n{\n    const start_backup_app_request &request = rpc.request();\n    start_backup_app_response &response = rpc.response();\n\n    int32_t app_id = request.app_id;\n    std::shared_ptr<backup_engine> engine = std::make_shared<backup_engine>(this);\n    error_code err = engine->init_backup(app_id);\n    if (err != ERR_OK) {\n        response.err = err;\n        response.hint_message = fmt::format(\"Backup failed: invalid app id {}.\", app_id);\n        return;\n    }\n\n    err = engine->set_block_service(request.backup_provider_type);\n    if (err != ERR_OK) {\n        response.err = err;\n        response.hint_message = fmt::format(\"Backup failed: invalid backup_provider_type {}.\",\n                                            request.backup_provider_type);\n        return;\n    }\n\n    if (request.__isset.backup_path) {\n        err = engine->set_backup_path(request.backup_path);\n        if (err != ERR_OK) {\n            response.err = err;\n            response.hint_message = \"Backup failed: the default backup path has already configured \"\n                                    \"in `hdfs_service`, please modify the configuration if you \"\n                                    \"want to use a specific backup path.\";\n            return;\n        }\n    }\n\n    {\n        zauto_lock l(_lock);\n        for (const auto &backup : _backup_states) {\n            if (app_id == backup->get_backup_app_id() && backup->is_in_progress()) {\n                response.err = ERR_INVALID_STATE;\n                response.hint_message =\n                    fmt::format(\"Backup failed: app {} is actively being backed up.\", app_id);\n                return;\n            }\n        }\n    }\n\n    err = engine->start();\n    if (err == ERR_OK) {\n        int64_t backup_id = engine->get_current_backup_id();\n        {\n            zauto_lock l(_lock);\n            _backup_states.emplace_back(std::move(engine));\n        }\n        response.__isset.backup_id = true;\n        response.backup_id = backup_id;\n        response.hint_message =\n            fmt::format(\"Backup succeed: metadata of app {} has been successfully backed up \"\n                        \"and backup request has been sent to replica servers.\",\n                        app_id);\n    } else {\n        response.hint_message =\n            fmt::format(\"Backup failed: could not backup metadata for app {}.\", app_id);\n    }\n    response.err = err;\n}\n\nvoid backup_service::query_backup_status(query_backup_status_rpc rpc)\n{\n    const query_backup_status_request &request = rpc.request();\n    query_backup_status_response &response = rpc.response();\n\n    int32_t app_id = request.app_id;\n    {\n        zauto_lock l(_lock);\n        for (const auto &backup : _backup_states) {\n            if (app_id == backup->get_backup_app_id() &&\n                (!request.__isset.backup_id ||\n                 request.backup_id == backup->get_current_backup_id())) {\n                response.backup_items.emplace_back(backup->get_backup_item());\n            }\n        }\n    }\n\n    if (response.backup_items.empty()) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.hint_message = \"Backup not found, please check app_id or backup_id.\";\n        return;\n    }\n    response.__isset.backup_items = true;\n    response.hint_message = fmt::format(\n        \"There are {} available backups for app {}.\", response.backup_items.size(), app_id);\n    response.err = ERR_OK;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_backup_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <cstdio>\n#include <sstream>\n#include <iomanip> // std::setfill, std::setw\n#include <functional>\n\n#include <dsn/dist/block_service.h>\n#include <dsn/http/http_server.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <gtest/gtest_prod.h>\n\n#include \"backup_engine.h\"\n#include \"meta_data.h\"\n#include \"meta_rpc_types.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_service;\nclass server_state;\nclass backup_service;\n\nstruct backup_info_status\n{\n    enum type\n    {\n        ALIVE = 1, // backup info is preserved\n\n        DELETING = 2 // backup info is under deleting, should check whether backup checkpoint is\n                     // fully removed on backup media, then remove the backup_info on remote storage\n    };\n};\n\nstruct backup_info\n{\n    int64_t backup_id;\n    int64_t start_time_ms;\n    int64_t end_time_ms;\n\n    // \"app_ids\" is copied from policy.app_ids when\n    // a new backup is generated. The policy's\n    // app set may be changed, but backup_info.app_ids\n    // never change.\n    std::set<int32_t> app_ids;\n    std::map<int32_t, std::string> app_names;\n    int32_t info_status;\n    backup_info_status::type get_backup_status() const\n    {\n        return backup_info_status::type(info_status);\n    }\n    backup_info()\n        : backup_id(0), start_time_ms(0), end_time_ms(0), info_status(backup_info_status::ALIVE)\n    {\n    }\n    DEFINE_JSON_SERIALIZATION(\n        backup_id, start_time_ms, end_time_ms, app_ids, app_names, info_status)\n};\n\n// Attention: backup_start_time == 24:00 is represent no limit for start_time, 24:00 is mainly saved\n// for testing\n//\n// current, we don't support accurating to minute, only support accurating to hour, so\n// we just set minute to 0\nstruct backup_start_time\n{\n    int32_t hour;   // [0 ~24)\n    int32_t minute; // [0 ~ 60)\n    backup_start_time() : hour(0), minute(0) {}\n    backup_start_time(int32_t h, int32_t m) : hour(h), minute(m) {}\n    std::string to_string() const\n    {\n        std::stringstream ss;\n        ss << std::setw(2) << std::setfill('0') << std::to_string(hour) << \":\" << std::setw(2)\n           << std::setfill('0') << std::to_string(minute);\n        return ss.str();\n    }\n    // NOTICE: this function will modify hour and minute, if time is invalid, this func will set\n    // hour = 24, minute = 0\n    bool parse_from(const std::string &time)\n    {\n        if (::sscanf(time.c_str(), \"%d:%d\", &hour, &minute) != 2) {\n            return false;\n        } else {\n            if (hour > 24) {\n                hour = 24;\n                minute = 0;\n                return false;\n            }\n\n            if (hour == 24 && minute != 0) {\n                minute = 0;\n                return false;\n            }\n\n            if (minute >= 60) {\n                hour = 24;\n                minute = 0;\n                return false;\n            }\n        }\n        return true;\n    }\n\n    // return the interval between new_hour:new_min and start_time,\n    // namely new_hour:new_min - start_time;\n    // unit is ms\n    int64_t compute_time_drift_ms(int32_t new_hour, int32_t new_min)\n    {\n        int64_t res = 0;\n        // unit is hour\n        res += (new_hour - hour);\n        // unit is minute\n        res *= 60;\n        res += (new_min - minute);\n        // unit is ms\n        return (res * 60 * 1000);\n    }\n\n    // judge whether we should start backup base current time\n    bool should_start_backup(int32_t cur_hour, int32_t cur_min)\n    {\n        if (hour == 24) {\n            // erase the restrict of backup_start_time, just for testing\n            return true;\n        }\n        // NOTICE : if you want more precisely, you can use cur_min to implement\n        // now, we just ignore\n        return (cur_hour == hour);\n    }\n    DEFINE_JSON_SERIALIZATION(hour, minute)\n};\n\n//\n// the backup process of meta server:\n//      1, write the app metadata to block filesystem\n//      2, tell the primary of each partition periodically to start backup until app finish backup\n//      3, receive the backup response from each primary to judge whether backup is finished\n//      4, if one app finish its backup, write a flag to block filesystem(we write a file named\n//         app_backup_status to represent the flag) to represent it has finished backup\n//      5, if policy finished backup, write the backup information (backup_info) to block filesystem\n//      6, backup is finished, we just wait to start another backup\n//\n\nclass policy : public policy_info\n{\npublic:\n    std::set<int32_t> app_ids;\n    std::map<int32_t, std::string> app_names;\n    int64_t backup_interval_seconds;\n    int32_t backup_history_count_to_keep;\n    bool is_disable;\n    backup_start_time start_time;\n    policy()\n        : app_ids(),\n          backup_interval_seconds(0),\n          backup_history_count_to_keep(6),\n          is_disable(false),\n          start_time(24, 0) // default is 24:00, namely no limit\n    {\n    }\n\n    DEFINE_JSON_SERIALIZATION(policy_name,\n                              backup_provider_type,\n                              app_ids,\n                              app_names,\n                              backup_interval_seconds,\n                              backup_history_count_to_keep,\n                              is_disable,\n                              start_time)\n};\n\nstruct backup_progress\n{\n    int32_t unfinished_apps;\n    std::map<gpid, int32_t> partition_progress;\n    std::map<app_id, int32_t> unfinished_partitions_per_app;\n    // <app_id, <partition_id, checkpoint size>>\n    std::map<app_id, std::map<int, int64_t>> app_chkpt_size;\n    // if app is dropped when starting a new backup or under backuping, we just skip backup this app\n    std::map<app_id, bool> is_app_skipped;\n\n    backup_progress() : unfinished_apps(0) {}\n\n    void reset()\n    {\n        unfinished_apps = 0;\n        partition_progress.clear();\n        unfinished_partitions_per_app.clear();\n        app_chkpt_size.clear();\n        is_app_skipped.clear();\n    }\n};\n\nstruct backup_flag\n{\n    int64_t total_checkpoint_size;\n    DEFINE_JSON_SERIALIZATION(total_checkpoint_size)\n};\n\nclass policy_context\n{\npublic:\n    explicit policy_context(backup_service *service)\n        : _backup_service(service), _block_service(nullptr)\n    {\n    }\n    mock_virtual ~policy_context() {}\n\n    void set_policy(const policy &p);\n    policy get_policy();\n    void add_backup_history(const backup_info &info);\n    std::vector<backup_info> get_backup_infos(int cnt);\n    bool is_under_backuping();\n    mock_virtual void start();\n    // function above will called be others, before call these function, should lock the _lock of\n    // policy_context, otherwise maybe lead deadlock\n\n    // clang-format off\nmock_private :\n    //\n    // update the partition progress\n    // the progress for app and whole-backup-instance will also updated accordingly.\n    // if whole-backup-instance is finished, sync it to the remote storage.\n    // NOTICE: the local \"_cur_backup\" is reset after it is successfully synced to remote,\n    //   which is in another task.\n    // so user can safely visit \"_cur_backup\" after this function call,\n    //   as long as the _lock is held.\n    //\n    // Return: true if the partition is finished, or-else false\n    //\n\n    mock_virtual bool\n    update_partition_progress_unlocked(gpid pid, int32_t progress, const rpc_address &source);\n    mock_virtual void record_partition_checkpoint_size_unlock(const gpid& pid, int64_t size);\n\n    mock_virtual void start_backup_app_meta_unlocked(int32_t app_id);\n    mock_virtual void start_backup_app_partitions_unlocked(int32_t app_id);\n    mock_virtual void start_backup_partition_unlocked(gpid pid);\n    // before finish backup one app, we write a flag file to represent whether the app's backup is\n    // finished\n    mock_virtual void write_backup_app_finish_flag_unlocked(int32_t app_id,\n                                                            dsn::task_ptr write_callback);\n    mock_virtual void finish_backup_app_unlocked(int32_t app_id);\n    // after finish backup all app, we record the information of policy's backup to block filesystem\n    mock_virtual void write_backup_info_unlocked(const backup_info &b_info,\n                                                 dsn::task_ptr write_callback);\n\n    mock_virtual void sync_backup_to_remote_storage_unlocked(const backup_info &b_info,\n                                                             dsn::task_ptr sync_callback,\n                                                             bool create_new_node);\n    mock_virtual void initialize_backup_progress_unlocked();\n    mock_virtual void prepare_current_backup_on_new_unlocked();\n    mock_virtual void issue_new_backup_unlocked();\n    // returns:\n    //  - true, should start backup right now, otherwise don't start backup\n    mock_virtual bool should_start_backup_unlocked();\n    mock_virtual void continue_current_backup_unlocked();\n\n    mock_virtual void on_backup_reply(dsn::error_code err,\n                                      backup_response &&response,\n                                      gpid pid,\n                                      const rpc_address &primary);\n\n    mock_virtual void gc_backup_info_unlocked(const backup_info &info_to_gc);\n    mock_virtual void issue_gc_backup_info_task_unlocked();\n    mock_virtual void sync_remove_backup_info(const backup_info &info, dsn::task_ptr sync_callback);\n\nmock_private :\n    friend class backup_service;\n    backup_service *_backup_service;\n\n    // lock the data-structure below\n    dsn::zlock _lock;\n\n    // policy related\n    policy _policy;\n    dist::block_service::block_filesystem *_block_service;\n\n    // backup related\n    backup_info _cur_backup;\n    bool _is_backup_failed;\n    // backup_id --> backup_info\n    std::map<int64_t, backup_info> _backup_history;\n    backup_progress _progress;\n    std::string _backup_sig; // policy_name@backup_id, used when print backup related log\n\n    perf_counter_wrapper _counter_policy_recent_backup_duration_ms;\n//clang-format on\n    dsn::task_tracker _tracker;\n};\n\nclass backup_service\n{\npublic:\n    struct backup_opt\n    {\n        std::chrono::milliseconds meta_retry_delay_ms;\n        std::chrono::milliseconds block_retry_delay_ms;\n        std::chrono::milliseconds app_dropped_retry_delay_ms;\n        std::chrono::milliseconds reconfiguration_retry_delay_ms;\n        std::chrono::milliseconds request_backup_period_ms; // period that meta send backup command to replica\n        std::chrono::milliseconds issue_backup_interval_ms; // interval that meta try to issue a new backup\n    };\n\n    typedef std::function<std::shared_ptr<policy_context>(backup_service *)> policy_factory;\n    explicit backup_service(meta_service *meta_svc,\n                            const std::string &policy_meta_root,\n                            const std::string &backup_root,\n                            const policy_factory &factory);\n    meta_service *get_meta_service() const { return _meta_svc; }\n    server_state *get_state() const { return _state; }\n    backup_opt &backup_option() { return _opt; }\n    void start();\n\n    const std::string &backup_root() const { return _backup_root; }\n    const std::string &policy_root() const { return _policy_meta_root; }\n    void add_backup_policy(dsn::message_ex* msg);\n    void query_backup_policy(query_backup_policy_rpc rpc);\n    void modify_backup_policy(configuration_modify_backup_policy_rpc rpc);\n    void start_backup_app(start_backup_app_rpc rpc);\n    void query_backup_status(query_backup_status_rpc rpc);\n\n    // compose the absolute path(AP) for policy\n    // input:\n    //  -- root:        the prefix of the AP\n    // return:\n    //      the AP of this policy: <policy_meta_root>/<policy_name>\n    std::string get_policy_path(const std::string &policy_name);\n    // compose the absolute path(AP) for backup\n    // input:\n    //  -- root:        the prefix of the AP\n    // return:\n    //      the AP of this backup: <policy_meta_root>/<policy_name>/<backup_id>\n    std::string get_backup_path(const std::string &policy_name, int64_t backup_id);\n\nprivate:\n    friend class backup_service_test;\n    friend class meta_service_test_app;\n\n    FRIEND_TEST(backup_service_test, test_init_backup);\n    FRIEND_TEST(backup_service_test, test_query_backup_status);\n    FRIEND_TEST(meta_backup_service_test, test_add_backup_policy);\n\n    void start_create_policy_meta_root(dsn::task_ptr callback);\n    void start_sync_policies();\n    error_code sync_policies_from_remote_storage();\n\n    void do_add_policy(dsn::message_ex* req,\n                       std::shared_ptr<policy_context> p,\n                       const std::string &hint_msg);\n    void do_update_policy_to_remote_storage(configuration_modify_backup_policy_rpc rpc,\n                                            const policy &p,\n                                            std::shared_ptr<policy_context> &p_context_ptr);\n\n    bool is_valid_policy_name_unlocked(const std::string &policy_name);\n\n    policy_factory _factory;\n    meta_service *_meta_svc;\n    server_state *_state;\n\n    // lock _policy_states and _backup_states.\n    zlock _lock;\n    std::map<std::string, std::shared_ptr<policy_context>>\n        _policy_states; // policy_name -> policy_context\n\n    // _backup_states stores all states of one-time backup in the cluster, not persistence to ZK\n    std::vector<std::shared_ptr<backup_engine>> _backup_states;\n\n    // the root of policy metas, stored on remote_storage(zookeeper)\n    std::string _policy_meta_root;\n    // the root of cold backup data, stored on block service\n    std::string _backup_root;\n\n    backup_opt _opt;\n    std::atomic_bool _in_initialize;\n    dsn::task_tracker _tracker;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_bulk_load_ingestion_context.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"meta_bulk_load_ingestion_context.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint32(\"meta_server\",\n                  bulk_load_node_max_ingesting_count,\n                  4,\n                  \"max partition_count executing ingestion for one node at the same time\");\nDSN_TAG_VARIABLE(bulk_load_node_max_ingesting_count, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"meta_server\", bulk_load_node_min_disk_count, 1, \"min disk count of one node\");\nDSN_TAG_VARIABLE(bulk_load_node_min_disk_count, FT_MUTABLE);\n\ningestion_context::ingestion_context() { reset_all(); }\n\ningestion_context::~ingestion_context() { reset_all(); }\n\nvoid ingestion_context::partition_node_info::create(const partition_configuration &config,\n                                                    const config_context &cc)\n{\n    pid = config.pid;\n    std::unordered_set<rpc_address> current_nodes;\n    current_nodes.insert(config.primary);\n    for (const auto &secondary : config.secondaries) {\n        current_nodes.insert(secondary);\n    }\n    for (const auto &node : current_nodes) {\n        std::string disk_tag;\n        if (cc.get_disk_tag(node, disk_tag)) {\n            node_disk[node] = disk_tag;\n        }\n    }\n}\n\nvoid ingestion_context::node_context::init_disk(const std::string &disk_tag)\n{\n    if (disk_ingesting_counts.find(disk_tag) != disk_ingesting_counts.end()) {\n        return;\n    }\n    disk_ingesting_counts[disk_tag] = 0;\n}\n\nuint32_t ingestion_context::node_context::get_max_disk_ingestion_count(\n    const uint32_t max_node_ingestion_count) const\n{\n    FAIL_POINT_INJECT_F(\"ingestion_node_context_disk_count\", [](string_view count_str) -> uint32_t {\n        uint32_t count = 0;\n        buf2uint32(count_str, count);\n        return count;\n    });\n\n    const auto node_disk_count = disk_ingesting_counts.size() > FLAGS_bulk_load_node_min_disk_count\n                                     ? disk_ingesting_counts.size()\n                                     : FLAGS_bulk_load_node_min_disk_count;\n    return (max_node_ingestion_count + node_disk_count - 1) / node_disk_count;\n}\n\nbool ingestion_context::node_context::check_if_add(const std::string &disk_tag)\n{\n    auto max_node_ingestion_count = FLAGS_bulk_load_node_max_ingesting_count;\n    if (node_ingesting_count >= max_node_ingestion_count) {\n        dwarn_f(\"node[{}] has {} partition executing ingestion, max_count = {}\",\n                address.to_string(),\n                node_ingesting_count,\n                max_node_ingestion_count);\n        return false;\n    }\n\n    auto max_disk_ingestion_count = get_max_disk_ingestion_count(max_node_ingestion_count);\n    if (disk_ingesting_counts[disk_tag] >= max_disk_ingestion_count) {\n        dwarn_f(\"node[{}] disk[{}] has {} partition executing ingestion, max_count = {}\",\n                address.to_string(),\n                disk_tag,\n                disk_ingesting_counts[disk_tag],\n                max_disk_ingestion_count);\n        return false;\n    }\n    return true;\n}\n\nvoid ingestion_context::node_context::add(const std::string &disk_tag)\n{\n    disk_ingesting_counts[disk_tag]++;\n    node_ingesting_count++;\n}\n\nvoid ingestion_context::node_context::decrease(const std::string &disk_tag)\n{\n    node_ingesting_count--;\n    disk_ingesting_counts[disk_tag]--;\n}\n\nbool ingestion_context::try_partition_ingestion(const partition_configuration &config,\n                                                const config_context &cc)\n{\n    FAIL_POINT_INJECT_F(\"ingestion_try_partition_ingestion\", [=](string_view) -> bool {\n        auto info = partition_node_info();\n        info.pid = config.pid;\n        _running_partitions[config.pid] = info;\n        return true;\n    });\n    partition_node_info info(config, cc);\n    for (const auto &kv : info.node_disk) {\n        if (!check_node_ingestion(kv.first, kv.second)) {\n            return false;\n        }\n    }\n    add_partition(info);\n    return true;\n}\n\nbool ingestion_context::check_node_ingestion(const rpc_address &node, const std::string &disk_tag)\n{\n    if (_nodes_context.find(node) == _nodes_context.end()) {\n        _nodes_context[node] = node_context(node, disk_tag);\n    }\n    return _nodes_context[node].check_if_add(disk_tag);\n}\n\nvoid ingestion_context::add_partition(const partition_node_info &info)\n{\n    for (const auto &kv : info.node_disk) {\n        _nodes_context[kv.first].add(kv.second);\n    }\n    _running_partitions[info.pid] = info;\n}\n\nvoid ingestion_context::remove_partition(const gpid &pid)\n{\n    FAIL_POINT_INJECT_F(\"ingestion_context_remove_partition\",\n                        [=](string_view) { _running_partitions.erase(pid); });\n\n    if (_running_partitions.find(pid) == _running_partitions.end()) {\n        return;\n    }\n    auto &info = _running_partitions[pid];\n    for (const auto &kv : info.node_disk) {\n        _nodes_context[kv.first].decrease(kv.second);\n    }\n    _running_partitions.erase(pid);\n}\n\nuint32_t ingestion_context::get_app_ingesting_count(const uint32_t app_id) const\n{\n    uint32_t running_count = 0;\n    for (const auto &kv : _running_partitions) {\n        if (kv.first.get_app_id() == app_id) {\n            running_count++;\n        }\n    }\n    return running_count;\n}\n\nvoid ingestion_context::reset_app(const uint32_t app_id)\n{\n    std::unordered_set<gpid> removing_partitions;\n    for (const auto &kv : _running_partitions) {\n        if (kv.first.get_app_id() == app_id) {\n            removing_partitions.insert(kv.first);\n        }\n    }\n    for (const auto &pid : removing_partitions) {\n        remove_partition(pid);\n    }\n}\n\nvoid ingestion_context::reset_all()\n{\n    _running_partitions.clear();\n    _nodes_context.clear();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_bulk_load_ingestion_context.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta_service.h\"\n#include \"server_state.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint32(bulk_load_node_max_ingesting_count);\nDSN_DECLARE_uint32(bulk_load_node_min_disk_count);\n\n// Meta bulk load helper class, used to manage ingesting partitions\nclass ingestion_context\n{\npublic:\n    explicit ingestion_context();\n    ~ingestion_context();\n\nprivate:\n    struct partition_node_info\n    {\n        gpid pid;\n        // node address -> disk_tag\n        std::unordered_map<rpc_address, std::string> node_disk;\n\n        partition_node_info() {}\n        partition_node_info(const partition_configuration &config, const config_context &cc)\n        {\n            create(config, cc);\n        }\n        void create(const partition_configuration &config, const config_context &cc);\n    };\n\n    struct node_context\n    {\n        rpc_address address;\n        uint32_t node_ingesting_count;\n        // disk tag -> ingesting partition count\n        std::unordered_map<std::string, int32_t> disk_ingesting_counts;\n\n        node_context() {}\n        node_context(const rpc_address &address, const std::string &disk_tag)\n            : address(address), node_ingesting_count(0)\n        {\n            init_disk(disk_tag);\n        }\n\n        void init_disk(const std::string &disk_tag);\n        uint32_t get_max_disk_ingestion_count(const uint32_t max_node_ingestion_count) const;\n        bool check_if_add(const std::string &disk_tag);\n        void add(const std::string &disk_tag);\n        void decrease(const std::string &disk_tag);\n    };\n\n    bool try_partition_ingestion(const partition_configuration &config, const config_context &cc);\n    bool check_node_ingestion(const rpc_address &node, const std::string &disk_tag);\n    void add_partition(const partition_node_info &info);\n    void remove_partition(const gpid &pid);\n    uint32_t get_app_ingesting_count(const uint32_t app_id) const;\n    void reset_app(const uint32_t app_id);\n    void reset_all();\n\nprivate:\n    friend class bulk_load_service;\n    friend class node_context_test;\n    friend class ingestion_context_test;\n\n    // ingesting partitions\n    std::unordered_map<gpid, partition_node_info> _running_partitions;\n    // every node and every disk ingesting partition count\n    std::unordered_map<rpc_address, node_context> _nodes_context;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_bulk_load_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"meta_bulk_load_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint32(\"meta_server\",\n                  bulk_load_max_rollback_times,\n                  10,\n                  \"if bulk load rollback time \"\n                  \"exceed this value, meta won't \"\n                  \"rollback bulk load process to \"\n                  \"downloading, but turn it into \"\n                  \"failed\");\nDSN_TAG_VARIABLE(bulk_load_max_rollback_times, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"meta_server\",\n                bulk_load_verify_before_ingest,\n                false,\n                \"verify files according to metadata before ingest\");\nDSN_TAG_VARIABLE(bulk_load_verify_before_ingest, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"meta_server\",\n                enable_concurrent_bulk_load,\n                false,\n                \"whether to enable different apps to execute bulk load at the same time\");\nDSN_TAG_VARIABLE(enable_concurrent_bulk_load, FT_MUTABLE);\n\nbulk_load_service::bulk_load_service(meta_service *meta_svc, const std::string &bulk_load_dir)\n    : _meta_svc(meta_svc), _state(meta_svc->get_server_state()), _bulk_load_root(bulk_load_dir)\n{\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::initialize_bulk_load_service()\n{\n    _sync_bulk_load_storage =\n        make_unique<mss::meta_storage>(_meta_svc->get_remote_storage(), &_sync_tracker);\n    _ingestion_context = make_unique<ingestion_context>();\n\n    create_bulk_load_root_dir();\n    _sync_tracker.wait_outstanding_tasks();\n\n    try_to_continue_bulk_load();\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::on_start_bulk_load(start_bulk_load_rpc rpc)\n{\n    FAIL_POINT_INJECT_F(\"meta_on_start_bulk_load\",\n                        [=](dsn::string_view) { rpc.response().err = ERR_OK; });\n\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n    response.err = ERR_OK;\n\n    if (!FLAGS_enable_concurrent_bulk_load &&\n        !_meta_svc->try_lock_meta_op_status(meta_op_status::BULKLOAD)) {\n        response.hint_msg = \"meta server is busy now, please wait\";\n        derror_f(\"{}\", response.hint_msg);\n        response.err = ERR_BUSY;\n        return;\n    }\n\n    std::shared_ptr<app_state> app = get_app(request.app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.hint_msg = fmt::format(\n            \"app {} is \", response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"not available\");\n        derror_f(\"{}\", response.hint_msg);\n        _meta_svc->unlock_meta_op_status();\n        return;\n    }\n    if (app->is_bulk_loading) {\n        response.err = ERR_BUSY;\n        response.hint_msg = fmt::format(\"app({}) is already executing bulk load\", app->app_name);\n        derror_f(\"{}\", response.hint_msg);\n        _meta_svc->unlock_meta_op_status();\n        return;\n    }\n\n    std::string hint_msg;\n    error_code e = check_bulk_load_request_params(\n        request, app->app_id, app->partition_count, app->envs, hint_msg);\n    if (e != ERR_OK) {\n        response.err = e;\n        response.hint_msg = hint_msg;\n        _meta_svc->unlock_meta_op_status();\n        return;\n    }\n\n    ddebug_f(\"app({}) start bulk load, cluster_name = {}, provider = {}, remote root path = {}, \"\n             \"ingest_behind = {}\",\n             request.app_name,\n             request.cluster_name,\n             request.file_provider_type,\n             request.remote_root_path,\n             request.ingest_behind);\n\n    // clear old bulk load result\n    reset_local_bulk_load_states(app->app_id, app->app_name, true);\n    // avoid possible load balancing\n    _meta_svc->set_function_level(meta_function_level::fl_steady);\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     _meta_svc->tracker(),\n                     [this, rpc, app]() { do_start_app_bulk_load(std::move(app), std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nerror_code\nbulk_load_service::check_bulk_load_request_params(const start_bulk_load_request &request,\n                                                  const int32_t app_id,\n                                                  const int32_t partition_count,\n                                                  const std::map<std::string, std::string> &envs,\n                                                  std::string &hint_msg)\n{\n    FAIL_POINT_INJECT_F(\"meta_check_bulk_load_request_params\",\n                        [](dsn::string_view) -> error_code { return ERR_OK; });\n\n    if (!validate_ingest_behind(envs, request.ingest_behind)) {\n        hint_msg = fmt::format(\"inconsistent ingestion behind option\");\n        derror_f(\"{}\", hint_msg);\n        return ERR_INCONSISTENT_STATE;\n    }\n\n    auto file_provider = request.file_provider_type;\n    // check file provider\n    dsn::dist::block_service::block_filesystem *blk_fs =\n        _meta_svc->get_block_service_manager().get_or_create_block_filesystem(file_provider);\n    if (blk_fs == nullptr) {\n        derror_f(\"invalid remote file provider type: {}\", file_provider);\n        hint_msg = \"invalid file_provider\";\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    // sync get bulk_load_info file_handler\n    const std::string remote_path =\n        get_bulk_load_info_path(request.app_name, request.cluster_name, request.remote_root_path);\n    dsn::dist::block_service::create_file_request cf_req;\n    cf_req.file_name = remote_path;\n    cf_req.ignore_metadata = true;\n    error_code err = ERR_OK;\n    dsn::dist::block_service::block_file_ptr file_handler = nullptr;\n    blk_fs\n        ->create_file(\n            cf_req,\n            TASK_CODE_EXEC_INLINED,\n            [&err, &file_handler](const dsn::dist::block_service::create_file_response &resp) {\n                err = resp.err;\n                file_handler = resp.file_handle;\n            })\n        ->wait();\n    if (err != ERR_OK || file_handler == nullptr) {\n        derror_f(\n            \"failed to get file({}) handler on remote provider({})\", remote_path, file_provider);\n        hint_msg = \"file_provider error\";\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    // sync read bulk_load_info on file provider\n    dsn::dist::block_service::read_response r_resp;\n    file_handler\n        ->read(dsn::dist::block_service::read_request{0, -1},\n               TASK_CODE_EXEC_INLINED,\n               [&r_resp](const dsn::dist::block_service::read_response &resp) { r_resp = resp; })\n        ->wait();\n    if (r_resp.err != ERR_OK) {\n        derror_f(\"failed to read file({}) on remote provider({}), error = {}\",\n                 remote_path,\n                 file_provider,\n                 r_resp.err.to_string());\n        hint_msg = \"read bulk_load_info failed\";\n        return r_resp.err;\n    }\n\n    bulk_load_info bl_info;\n    if (!::dsn::json::json_forwarder<bulk_load_info>::decode(r_resp.buffer, bl_info)) {\n        derror_f(\"file({}) is damaged on remote file provider({})\", remote_path, file_provider);\n        hint_msg = \"bulk_load_info damaged\";\n        return ERR_CORRUPTION;\n    }\n\n    if (bl_info.app_id != app_id || bl_info.partition_count != partition_count) {\n        derror_f(\"app({}) information is inconsistent, local app_id({}) VS remote app_id({}), \"\n                 \"local partition_count({}) VS remote partition_count({})\",\n                 request.app_name,\n                 app_id,\n                 bl_info.app_id,\n                 partition_count,\n                 bl_info.partition_count);\n        hint_msg = \"app_id or partition_count is inconsistent\";\n        return ERR_INCONSISTENT_STATE;\n    }\n\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::do_start_app_bulk_load(std::shared_ptr<app_state> app,\n                                               start_bulk_load_rpc rpc)\n{\n    app_info info = *app;\n    info.__set_is_bulk_loading(true);\n\n    blob value = dsn::json::json_forwarder<app_info>::encode(info);\n    _meta_svc->get_meta_storage()->set_data(\n        _state->get_app_path(*app), std::move(value), [app, rpc, this]() {\n            {\n                zauto_write_lock l(app_lock());\n                app->is_bulk_loading = true;\n            }\n            {\n                zauto_write_lock l(_lock);\n                _bulk_load_app_id.insert(app->app_id);\n                _apps_in_progress_count[app->app_id] = app->partition_count;\n            }\n            create_app_bulk_load_dir(\n                app->app_name, app->app_id, app->partition_count, std::move(rpc));\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::create_app_bulk_load_dir(const std::string &app_name,\n                                                 int32_t app_id,\n                                                 int32_t partition_count,\n                                                 start_bulk_load_rpc rpc)\n{\n    const auto &req = rpc.request();\n\n    app_bulk_load_info ainfo;\n    ainfo.app_id = app_id;\n    ainfo.app_name = app_name;\n    ainfo.partition_count = partition_count;\n    ainfo.status = bulk_load_status::BLS_DOWNLOADING;\n    ainfo.cluster_name = req.cluster_name;\n    ainfo.file_provider_type = req.file_provider_type;\n    ainfo.remote_root_path = req.remote_root_path;\n    ainfo.ingest_behind = req.ingest_behind;\n    ainfo.is_ever_ingesting = false;\n    ainfo.bulk_load_err = ERR_OK;\n\n    _meta_svc->get_meta_storage()->delete_node_recursively(\n        get_app_bulk_load_path(app_id), [this, rpc, ainfo]() {\n            std::string bulk_load_path = get_app_bulk_load_path(ainfo.app_id);\n            ddebug_f(\"remove app({}) bulk load dir {} succeed\", ainfo.app_name, bulk_load_path);\n\n            blob value = dsn::json::json_forwarder<app_bulk_load_info>::encode(ainfo);\n            _meta_svc->get_meta_storage()->create_node(\n                std::move(bulk_load_path), std::move(value), [this, rpc, ainfo]() {\n                    dinfo_f(\"create app({}) bulk load dir\", ainfo.app_name);\n                    {\n                        zauto_write_lock l(_lock);\n                        _app_bulk_load_info[ainfo.app_id] = ainfo;\n                        _apps_pending_sync_flag[ainfo.app_id] = false;\n                        _apps_rollback_count[ainfo.app_id] = 0;\n                    }\n                    for (int32_t i = 0; i < ainfo.partition_count; ++i) {\n                        create_partition_bulk_load_dir(ainfo.app_name,\n                                                       gpid(ainfo.app_id, i),\n                                                       ainfo.partition_count,\n                                                       std::move(rpc));\n                    }\n                });\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::create_partition_bulk_load_dir(const std::string &app_name,\n                                                       const gpid &pid,\n                                                       int32_t partition_count,\n                                                       start_bulk_load_rpc rpc)\n{\n    partition_bulk_load_info pinfo;\n    pinfo.status = bulk_load_status::BLS_DOWNLOADING;\n    pinfo.ever_ingest_succeed = false;\n    blob value = dsn::json::json_forwarder<partition_bulk_load_info>::encode(pinfo);\n\n    _meta_svc->get_meta_storage()->create_node(\n        get_partition_bulk_load_path(pid),\n        std::move(value),\n        [app_name, pid, partition_count, rpc, pinfo, this]() {\n            dinfo_f(\"app({}) create partition({}) bulk_load_info\", app_name, pid.to_string());\n            {\n                zauto_write_lock l(_lock);\n                _partition_bulk_load_info[pid] = pinfo;\n                _partitions_pending_sync_flag[pid] = false;\n                if (--_apps_in_progress_count[pid.get_app_id()] == 0) {\n                    ddebug_f(\"app({}) start bulk load succeed\", app_name);\n                    _apps_in_progress_count[pid.get_app_id()] = partition_count;\n                    rpc.response().err = ERR_OK;\n                }\n            }\n            // start send bulk load to replica servers\n            partition_bulk_load(app_name, pid);\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nbool bulk_load_service::check_partition_status(\n    const std::string &app_name,\n    const gpid &pid,\n    bool always_unhealthy_check,\n    const std::function<void(const std::string &, const gpid &)> &retry_function,\n    /*out*/ partition_configuration &pconfig)\n{\n    std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        dwarn_f(\n            \"app(name={}, id={}) is not existed, set bulk load failed\", app_name, pid.get_app_id());\n        handle_app_unavailable(pid.get_app_id(), app_name);\n        return false;\n    }\n\n    pconfig = app->partitions[pid.get_partition_index()];\n    if (pconfig.primary.is_invalid()) {\n        dwarn_f(\"app({}) partition({}) primary is invalid, try it later\", app_name, pid);\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         [retry_function, app_name, pid]() { retry_function(app_name, pid); },\n                         0,\n                         std::chrono::seconds(1));\n        return false;\n    }\n\n    if (pconfig.secondaries.size() < pconfig.max_replica_count - 1) {\n        bulk_load_status::type p_status;\n        {\n            zauto_read_lock l(_lock);\n            p_status = get_partition_bulk_load_status_unlocked(pid);\n        }\n        // rollback to downloading, pause,cancel,failed bulk load should always send to replica\n        // server\n        if (!always_unhealthy_check && (p_status == bulk_load_status::BLS_DOWNLOADING ||\n                                        p_status == bulk_load_status::BLS_PAUSING ||\n                                        p_status == bulk_load_status::BLS_CANCELED ||\n                                        p_status == bulk_load_status::BLS_FAILED)) {\n            return true;\n        }\n        dwarn_f(\"app({}) partition({}) is unhealthy, status({}), try it later\",\n                app_name,\n                pid,\n                dsn::enum_to_string(p_status));\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         [retry_function, app_name, pid]() { retry_function(app_name, pid); },\n                         0,\n                         std::chrono::seconds(1));\n        return false;\n    }\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::partition_bulk_load(const std::string &app_name, const gpid &pid)\n{\n    FAIL_POINT_INJECT_F(\"meta_bulk_load_partition_bulk_load\", [](dsn::string_view) {});\n\n    partition_configuration pconfig;\n    if (!check_partition_status(app_name,\n                                pid,\n                                false,\n                                std::bind(&bulk_load_service::partition_bulk_load,\n                                          this,\n                                          std::placeholders::_1,\n                                          std::placeholders::_2),\n                                pconfig)) {\n        return;\n    }\n\n    rpc_address primary_addr = pconfig.primary;\n    auto req = make_unique<bulk_load_request>();\n    {\n        zauto_read_lock l(_lock);\n        const app_bulk_load_info &ainfo = _app_bulk_load_info[pid.get_app_id()];\n        req->pid = pid;\n        req->app_name = app_name;\n        req->primary_addr = primary_addr;\n        req->remote_provider_name = ainfo.file_provider_type;\n        req->cluster_name = ainfo.cluster_name;\n        req->meta_bulk_load_status = get_partition_bulk_load_status_unlocked(pid);\n        req->ballot = pconfig.ballot;\n        req->query_bulk_load_metadata = is_partition_metadata_not_updated_unlocked(pid);\n        req->remote_root_path = ainfo.remote_root_path;\n    }\n\n    ddebug_f(\"send bulk load request to node({}), app({}), partition({}), partition \"\n             \"status = {}, remote provider = {}, cluster_name = {}, remote_root_path = {}\",\n             primary_addr.to_string(),\n             app_name,\n             pid,\n             dsn::enum_to_string(req->meta_bulk_load_status),\n             req->remote_provider_name,\n             req->cluster_name,\n             req->remote_root_path);\n\n    bulk_load_rpc rpc(std::move(req), RPC_BULK_LOAD, 0_ms, 0, pid.thread_hash());\n    rpc.call(primary_addr, _meta_svc->tracker(), [this, rpc](error_code err) mutable {\n        on_partition_bulk_load_reply(err, rpc.request(), rpc.response());\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::on_partition_bulk_load_reply(error_code err,\n                                                     const bulk_load_request &request,\n                                                     const bulk_load_response &response)\n{\n    const std::string &app_name = request.app_name;\n    const gpid &pid = request.pid;\n    const rpc_address &primary_addr = request.primary_addr;\n\n    if (err != ERR_OK) {\n        derror_f(\n            \"app({}), partition({}) failed to receive bulk load response from node({}), error = {}\",\n            app_name,\n            pid,\n            primary_addr.to_string(),\n            err.to_string());\n        try_rollback_to_downloading(app_name, pid);\n        try_resend_bulk_load_request(app_name, pid);\n        return;\n    }\n\n    if (response.err == ERR_OBJECT_NOT_FOUND || response.err == ERR_INVALID_STATE) {\n        derror_f(\n            \"app({}), partition({}) doesn't exist or has invalid state on node({}), error = {}\",\n            app_name,\n            pid,\n            primary_addr.to_string(),\n            response.err.to_string());\n        try_rollback_to_downloading(app_name, pid);\n        try_resend_bulk_load_request(app_name, pid);\n        return;\n    }\n\n    if (response.err == ERR_BUSY) {\n        dwarn_f(\"node({}) has enough replicas downloading, wait for next round to send bulk load \"\n                \"request for app({}), partition({})\",\n                primary_addr.to_string(),\n                app_name,\n                pid);\n        try_resend_bulk_load_request(app_name, pid);\n        return;\n    }\n\n    if (response.err != ERR_OK) {\n        derror_f(\"app({}), partition({}) from node({}) handle bulk load response failed, error = \"\n                 \"{}, primary status = {}\",\n                 app_name,\n                 pid,\n                 primary_addr.to_string(),\n                 response.err.to_string(),\n                 dsn::enum_to_string(response.primary_bulk_load_status));\n        handle_bulk_load_failed(pid.get_app_id(), response.err);\n        try_resend_bulk_load_request(app_name, pid);\n        return;\n    }\n\n    // response.err = ERR_OK\n    std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        dwarn_f(\n            \"app(name={}, id={}) is not existed, set bulk load failed\", app_name, pid.get_app_id());\n        handle_app_unavailable(pid.get_app_id(), app_name);\n        return;\n    }\n    ballot current_ballot = app->partitions[pid.get_partition_index()].ballot;\n    if (request.ballot < current_ballot) {\n        dwarn_f(\"receive out-date response from node({}), app({}), partition({}), request ballot = \"\n                \"{}, current ballot= {}\",\n                primary_addr.to_string(),\n                app_name,\n                pid,\n                request.ballot,\n                current_ballot);\n        try_rollback_to_downloading(app_name, pid);\n        try_resend_bulk_load_request(app_name, pid);\n        return;\n    }\n\n    // handle bulk load states reported from primary replica\n    bulk_load_status::type app_status = get_app_bulk_load_status(response.pid.get_app_id());\n    switch (app_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n        handle_app_downloading(response, primary_addr);\n        break;\n    case bulk_load_status::BLS_DOWNLOADED:\n        update_partition_info_on_remote_storage(\n            response.app_name, response.pid, bulk_load_status::BLS_INGESTING);\n        // when app status is downloaded or ingesting, send request frequently\n        break;\n    case bulk_load_status::BLS_INGESTING:\n        handle_app_ingestion(response, primary_addr);\n        break;\n    case bulk_load_status::BLS_SUCCEED:\n    case bulk_load_status::BLS_FAILED:\n    case bulk_load_status::BLS_CANCELED:\n        handle_bulk_load_finish(response, primary_addr);\n        break;\n    case bulk_load_status::BLS_PAUSING:\n        handle_app_pausing(response, primary_addr);\n        break;\n    case bulk_load_status::BLS_PAUSED:\n        // paused not send request to replica servers\n        return;\n    default:\n        // do nothing in other status\n        break;\n    }\n\n    try_resend_bulk_load_request(app_name, pid);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::try_resend_bulk_load_request(const std::string &app_name, const gpid &pid)\n{\n    FAIL_POINT_INJECT_F(\"meta_bulk_load_resend_request\", [](dsn::string_view) {});\n    zauto_read_lock l(_lock);\n    if (is_app_bulk_loading_unlocked(pid.get_app_id())) {\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         std::bind(&bulk_load_service::partition_bulk_load, this, app_name, pid),\n                         0,\n                         std::chrono::seconds(bulk_load_constant::BULK_LOAD_REQUEST_INTERVAL));\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_app_downloading(const bulk_load_response &response,\n                                               const rpc_address &primary_addr)\n{\n    const std::string &app_name = response.app_name;\n    const gpid &pid = response.pid;\n\n    if (!response.__isset.total_download_progress) {\n        dwarn_f(\n            \"receive bulk load response from node({}) app({}), partition({}), primary_status({}), \"\n            \"but total_download_progress is not set\",\n            primary_addr.to_string(),\n            app_name,\n            pid,\n            dsn::enum_to_string(response.primary_bulk_load_status));\n        return;\n    }\n\n    for (const auto &kv : response.group_bulk_load_state) {\n        const auto &bulk_load_states = kv.second;\n        if (!bulk_load_states.__isset.download_progress ||\n            !bulk_load_states.__isset.download_status) {\n            dwarn_f(\"receive bulk load response from node({}) app({}), partition({}), \"\n                    \"primary_status({}), but node({}) progress or status is not set\",\n                    primary_addr.to_string(),\n                    app_name,\n                    pid,\n                    dsn::enum_to_string(response.primary_bulk_load_status),\n                    kv.first.to_string());\n            return;\n        }\n        // check partition download status\n        if (bulk_load_states.download_status != ERR_OK) {\n            derror_f(\"app({}) partition({}) on node({}) meet unrecoverable error during \"\n                     \"downloading files, error = {}\",\n                     app_name,\n                     pid,\n                     kv.first.to_string(),\n                     bulk_load_states.download_status);\n\n            error_code err = ERR_UNKNOWN;\n            // ERR_FILE_OPERATION_FAILED: local file system error\n            // ERR_FS_INTERNAL: remote file system error\n            // ERR_CORRUPTION: file not exist or damaged\n            if (ERR_FILE_OPERATION_FAILED == bulk_load_states.download_status ||\n                ERR_FS_INTERNAL == bulk_load_states.download_status ||\n                ERR_CORRUPTION == bulk_load_states.download_status) {\n                err = bulk_load_states.download_status;\n            }\n            handle_bulk_load_failed(pid.get_app_id(), err);\n            return;\n        }\n    }\n\n    // if replica report metadata, update metadata on remote storage\n    if (response.__isset.metadata && is_partition_metadata_not_updated(pid)) {\n        update_partition_metadata_on_remote_storage(app_name, pid, response.metadata);\n    }\n\n    // update download progress\n    int32_t total_progress = response.total_download_progress;\n    ddebug_f(\"receive bulk load response from node({}) app({}) partition({}), primary_status({}), \"\n             \"total_download_progress = {}\",\n             primary_addr.to_string(),\n             app_name,\n             pid,\n             dsn::enum_to_string(response.primary_bulk_load_status),\n             total_progress);\n    {\n        zauto_write_lock l(_lock);\n        _partitions_total_download_progress[pid] = total_progress;\n        _partitions_bulk_load_state[pid] = response.group_bulk_load_state;\n    }\n\n    // update partition status to `downloaded` if all replica downloaded\n    if (total_progress >= bulk_load_constant::PROGRESS_FINISHED) {\n        ddebug_f(\n            \"app({}) partirion({}) download all files from remote provider succeed\", app_name, pid);\n        update_partition_info_on_remote_storage(app_name, pid, bulk_load_status::BLS_DOWNLOADED);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_app_ingestion(const bulk_load_response &response,\n                                             const rpc_address &primary_addr)\n{\n    const std::string &app_name = response.app_name;\n    const gpid &pid = response.pid;\n\n    if (!response.__isset.is_group_ingestion_finished) {\n        dwarn_f(\"receive bulk load response from node({}) app({}) partition({}), \"\n                \"primary_status({}), but is_group_ingestion_finished is not set\",\n                primary_addr.to_string(),\n                app_name,\n                pid,\n                dsn::enum_to_string(response.primary_bulk_load_status));\n        return;\n    }\n\n    for (const auto &kv : response.group_bulk_load_state) {\n        const auto &bulk_load_states = kv.second;\n        if (!bulk_load_states.__isset.ingest_status) {\n            dwarn_f(\"receive bulk load response from node({}) app({}) partition({}), \"\n                    \"primary_status({}), but node({}) ingestion_status is not set\",\n                    primary_addr.to_string(),\n                    app_name,\n                    pid,\n                    dsn::enum_to_string(response.primary_bulk_load_status),\n                    kv.first.to_string());\n            return;\n        }\n\n        if (bulk_load_states.ingest_status == ingestion_status::IS_FAILED) {\n            derror_f(\"app({}) partition({}) on node({}) ingestion failed\",\n                     app_name,\n                     pid,\n                     kv.first.to_string());\n            finish_ingestion(pid);\n            handle_bulk_load_failed(pid.get_app_id(), ERR_INGESTION_FAILED);\n            return;\n        }\n    }\n\n    ddebug_f(\"receive bulk load response from node({}) app({}) partition({}), primary_status({}), \"\n             \"is_group_ingestion_finished = {}\",\n             primary_addr.to_string(),\n             app_name,\n             pid,\n             dsn::enum_to_string(response.primary_bulk_load_status),\n             response.is_group_ingestion_finished);\n    {\n        zauto_write_lock l(_lock);\n        _partitions_bulk_load_state[pid] = response.group_bulk_load_state;\n    }\n\n    if (response.is_group_ingestion_finished) {\n        ddebug_f(\"app({}) partition({}) ingestion files succeed\", app_name, pid);\n        finish_ingestion(pid);\n        update_partition_info_on_remote_storage(app_name, pid, bulk_load_status::BLS_SUCCEED);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_bulk_load_finish(const bulk_load_response &response,\n                                                const rpc_address &primary_addr)\n{\n    const std::string &app_name = response.app_name;\n    const gpid &pid = response.pid;\n\n    if (!response.__isset.is_group_bulk_load_context_cleaned_up) {\n        dwarn_f(\"receive bulk load response from node({}) app({}) partition({}), \"\n                \"primary_status({}), but is_group_bulk_load_context_cleaned_up is not set\",\n                primary_addr.to_string(),\n                app_name,\n                pid,\n                dsn::enum_to_string(response.primary_bulk_load_status));\n        return;\n    }\n\n    for (const auto &kv : response.group_bulk_load_state) {\n        if (!kv.second.__isset.is_cleaned_up) {\n            dwarn_f(\"receive bulk load response from node({}) app({}), partition({}), \"\n                    \"primary_status({}), but node({}) is_cleaned_up is not set\",\n                    primary_addr.to_string(),\n                    app_name,\n                    pid,\n                    dsn::enum_to_string(response.primary_bulk_load_status),\n                    kv.first.to_string());\n            return;\n        }\n    }\n\n    {\n        zauto_read_lock l(_lock);\n        if (_partitions_cleaned_up[pid]) {\n            dwarn_f(\n                \"receive bulk load response from node({}) app({}) partition({}), current partition \"\n                \"has already been cleaned up\",\n                primary_addr.to_string(),\n                app_name,\n                pid);\n            return;\n        }\n    }\n\n    // The replicas have cleaned up their bulk load states and removed temporary sst files\n    bool group_cleaned_up = response.is_group_bulk_load_context_cleaned_up;\n    ddebug_f(\"receive bulk load response from node({}) app({}) partition({}), primary status = {}, \"\n             \"is_group_bulk_load_context_cleaned_up = {}\",\n             primary_addr.to_string(),\n             app_name,\n             pid,\n             dsn::enum_to_string(response.primary_bulk_load_status),\n             group_cleaned_up);\n    {\n        zauto_write_lock l(_lock);\n        _partitions_cleaned_up[pid] = group_cleaned_up;\n        _partitions_bulk_load_state[pid] = response.group_bulk_load_state;\n    }\n\n    if (group_cleaned_up) {\n        int32_t count = 0;\n        {\n            zauto_write_lock l(_lock);\n            count = --_apps_in_progress_count[pid.get_app_id()];\n        }\n        if (count == 0) {\n            std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n            if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n                dwarn_f(\"app(name={}, id={}) is not existed, remove bulk load dir on remote \"\n                        \"storage\",\n                        app_name,\n                        pid.get_app_id());\n                remove_bulk_load_dir_on_remote_storage(pid.get_app_id(), app_name);\n                return;\n            }\n            ddebug_f(\"app({}) update app to not bulk loading\", app_name);\n            update_app_not_bulk_loading_on_remote_storage(std::move(app));\n            reset_local_bulk_load_states(pid.get_app_id(), app_name, false);\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_app_pausing(const bulk_load_response &response,\n                                           const rpc_address &primary_addr)\n{\n    const std::string &app_name = response.app_name;\n    const gpid &pid = response.pid;\n\n    if (!response.__isset.is_group_bulk_load_paused) {\n        dwarn_f(\"receive bulk load response from node({}) app({}) partition({}), \"\n                \"primary_status({}), but is_group_bulk_load_paused is not set\",\n                primary_addr.to_string(),\n                app_name,\n                pid,\n                dsn::enum_to_string(response.primary_bulk_load_status));\n        return;\n    }\n\n    for (const auto &kv : response.group_bulk_load_state) {\n        if (!kv.second.__isset.is_paused) {\n            dwarn_f(\"receive bulk load response from node({}) app({}), partition({}), \"\n                    \"primary_status({}), but node({}) is_paused is not set\",\n                    primary_addr.to_string(),\n                    app_name,\n                    pid,\n                    dsn::enum_to_string(response.primary_bulk_load_status),\n                    kv.first.to_string());\n            return;\n        }\n    }\n\n    bool is_group_paused = response.is_group_bulk_load_paused;\n    ddebug_f(\"receive bulk load response from node({}) app({}) partition({}), primary status = {}, \"\n             \"is_group_bulk_load_paused = {}\",\n             primary_addr.to_string(),\n             app_name,\n             pid,\n             dsn::enum_to_string(response.primary_bulk_load_status),\n             is_group_paused);\n    {\n        zauto_write_lock l(_lock);\n        _partitions_bulk_load_state[pid] = response.group_bulk_load_state;\n    }\n\n    if (is_group_paused) {\n        ddebug_f(\"app({}) partirion({}) pause bulk load succeed\", response.app_name, pid);\n        update_partition_info_on_remote_storage(\n            response.app_name, pid, bulk_load_status::BLS_PAUSED);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::try_rollback_to_downloading(const std::string &app_name, const gpid &pid)\n{\n    zauto_write_lock l(_lock);\n\n    const auto app_status = get_app_bulk_load_status_unlocked(pid.get_app_id());\n    if (app_status != bulk_load_status::BLS_DOWNLOADING &&\n        app_status != bulk_load_status::BLS_DOWNLOADED &&\n        app_status != bulk_load_status::BLS_INGESTING) {\n        ddebug_f(\"app({}) status={}, no need to rollback to downloading, wait for next round\",\n                 app_name,\n                 dsn::enum_to_string(app_status));\n        return;\n    }\n\n    if (_apps_rolling_back[pid.get_app_id()]) {\n        dwarn_f(\"app({}) is rolling back to downloading, ignore this request\", app_name);\n        return;\n    }\n    if (_apps_rollback_count[pid.get_app_id()] >= FLAGS_bulk_load_max_rollback_times) {\n        dwarn_f(\n            \"app({}) has been rollback to downloading for {} times, make bulk load process failed\",\n            app_name,\n            _apps_rollback_count[pid.get_app_id()]);\n\n        update_app_status_on_remote_storage_unlocked(\n            pid.get_app_id(),\n            bulk_load_status::BLS_FAILED,\n            _app_bulk_load_info[pid.get_app_id()].is_ever_ingesting ? ERR_INGESTION_FAILED\n                                                                    : ERR_RETRY_EXHAUSTED);\n        return;\n    }\n    ddebug_f(\"app({}) will rolling back from {} to {}, current rollback_count = {}\",\n             app_name,\n             dsn::enum_to_string(app_status),\n             dsn::enum_to_string(bulk_load_status::BLS_DOWNLOADING),\n             _apps_rollback_count[pid.get_app_id()]);\n    _apps_rolling_back[pid.get_app_id()] = true;\n    _apps_rollback_count[pid.get_app_id()]++;\n    update_app_status_on_remote_storage_unlocked(pid.get_app_id(),\n                                                 bulk_load_status::type::BLS_DOWNLOADING);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_bulk_load_failed(int32_t app_id, error_code err)\n{\n    zauto_write_lock l(_lock);\n    if (!_apps_cleaning_up[app_id]) {\n        _apps_cleaning_up[app_id] = true;\n        update_app_status_on_remote_storage_unlocked(app_id, bulk_load_status::BLS_FAILED, err);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::handle_app_unavailable(int32_t app_id, const std::string &app_name)\n{\n    zauto_write_lock l(_lock);\n    if (is_app_bulk_loading_unlocked(app_id) && !_apps_cleaning_up[app_id]) {\n        _apps_cleaning_up[app_id] = true;\n        reset_local_bulk_load_states_unlocked(app_id, app_name, false);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_partition_metadata_on_remote_storage(\n    const std::string &app_name, const gpid &pid, const bulk_load_metadata &metadata)\n{\n    zauto_read_lock l(_lock);\n    partition_bulk_load_info pinfo = _partition_bulk_load_info[pid];\n    pinfo.metadata = metadata;\n    blob value = json::json_forwarder<partition_bulk_load_info>::encode(pinfo);\n\n    _meta_svc->get_meta_storage()->set_data(\n        get_partition_bulk_load_path(pid), std::move(value), [this, app_name, pid, pinfo]() {\n            zauto_write_lock l(_lock);\n            _partition_bulk_load_info[pid] = pinfo;\n            ddebug_f(\n                \"app({}) update partition({}) bulk load metadata, file count = {}, file size = {}\",\n                app_name,\n                pid,\n                pinfo.metadata.files.size(),\n                pinfo.metadata.file_total_size);\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_partition_info_on_remote_storage(const std::string &app_name,\n                                                                const gpid &pid,\n                                                                bulk_load_status::type new_status,\n                                                                bool should_send_request)\n{\n    zauto_write_lock l(_lock);\n    partition_bulk_load_info pinfo = _partition_bulk_load_info[pid];\n    if (pinfo.status == new_status && new_status != bulk_load_status::BLS_DOWNLOADING) {\n        dwarn_f(\"app({}) partition({}) old status:{} VS new status:{}, ignore it\",\n                app_name,\n                pid,\n                dsn::enum_to_string(pinfo.status),\n                dsn::enum_to_string(new_status));\n        return;\n    }\n\n    if (_partitions_pending_sync_flag[pid]) {\n        if (_apps_rolling_back[pid.get_app_id()] &&\n            new_status == bulk_load_status::BLS_DOWNLOADING) {\n            dwarn_f(\"app({}) partition({}) has already sync bulk load status, current_status = {}, \"\n                    \"wait and retry to set status as {}\",\n                    app_name,\n                    pid,\n                    dsn::enum_to_string(pinfo.status),\n                    dsn::enum_to_string(new_status));\n            tasking::enqueue(LPC_META_STATE_NORMAL,\n                             _meta_svc->tracker(),\n                             std::bind(&bulk_load_service::update_partition_info_on_remote_storage,\n                                       this,\n                                       app_name,\n                                       pid,\n                                       new_status,\n                                       should_send_request),\n                             0,\n                             std::chrono::seconds(1));\n        } else {\n            ddebug_f(\"app({}) partition({}) has already sync bulk load status, current_status = \"\n                     \"{}, new_status = {}, wait for next round\",\n                     app_name,\n                     pid,\n                     dsn::enum_to_string(pinfo.status),\n                     dsn::enum_to_string(new_status));\n        }\n        return;\n    }\n\n    _partitions_pending_sync_flag[pid] = true;\n    update_partition_info_unlock(pid, new_status, pinfo);\n\n    blob value = json::json_forwarder<partition_bulk_load_info>::encode(pinfo);\n    _meta_svc->get_meta_storage()->set_data(\n        get_partition_bulk_load_path(pid),\n        std::move(value),\n        std::bind(&bulk_load_service::update_partition_info_on_remote_storage_reply,\n                  this,\n                  app_name,\n                  pid,\n                  pinfo,\n                  should_send_request));\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_partition_info_unlock(const gpid &pid,\n                                                     bulk_load_status::type new_status,\n                                                     /*out*/ partition_bulk_load_info &pinfo)\n{\n    auto old_status = pinfo.status;\n    pinfo.status = new_status;\n    if (old_status != bulk_load_status::BLS_INGESTING ||\n        new_status != bulk_load_status::BLS_SUCCEED ||\n        _partitions_bulk_load_state.find(pid) == _partitions_bulk_load_state.end()) {\n        // no need to update other field of partition_bulk_load_info\n        return;\n    }\n    pinfo.addresses.clear();\n    const auto &state = _partitions_bulk_load_state[pid];\n    for (const auto &kv : state) {\n        pinfo.addresses.emplace_back(kv.first);\n    }\n    pinfo.ever_ingest_succeed = true;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_partition_info_on_remote_storage_reply(\n    const std::string &app_name,\n    const gpid &pid,\n    const partition_bulk_load_info &new_info,\n    bool should_send_request)\n{\n    {\n        zauto_write_lock l(_lock);\n        auto old_status = _partition_bulk_load_info[pid].status;\n        auto new_status = new_info.status;\n        _partition_bulk_load_info[pid] = new_info;\n        _partitions_pending_sync_flag[pid] = false;\n\n        ddebug_f(\"app({}) update partition({}) status from {} to {}\",\n                 app_name,\n                 pid,\n                 dsn::enum_to_string(old_status),\n                 dsn::enum_to_string(new_status));\n\n        switch (new_status) {\n        case bulk_load_status::BLS_DOWNLOADED:\n        case bulk_load_status::BLS_INGESTING:\n        case bulk_load_status::BLS_SUCCEED:\n        case bulk_load_status::BLS_PAUSED:\n            if (old_status != new_status && !_apps_rolling_back[pid.get_app_id()] &&\n                --_apps_in_progress_count[pid.get_app_id()] == 0) {\n                update_app_status_on_remote_storage_unlocked(pid.get_app_id(), new_status);\n            }\n            break;\n        case bulk_load_status::BLS_DOWNLOADING: {\n            _partitions_bulk_load_state.erase(pid);\n            _partitions_total_download_progress[pid] = 0;\n            _partitions_cleaned_up[pid] = false;\n\n            if (--_apps_in_progress_count[pid.get_app_id()] == 0) {\n                _apps_in_progress_count[pid.get_app_id()] =\n                    _app_bulk_load_info[pid.get_app_id()].partition_count;\n                _apps_rolling_back[pid.get_app_id()] = false;\n                ddebug_f(\"app({}) restart to bulk load\", app_name);\n            }\n        } break;\n        default:\n            // do nothing in other status\n            break;\n        }\n    }\n    if (should_send_request) {\n        partition_bulk_load(app_name, pid);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_app_status_on_remote_storage_unlocked(\n    int32_t app_id, bulk_load_status::type new_status, error_code err, bool should_send_request)\n{\n    FAIL_POINT_INJECT_F(\"meta_update_app_status_on_remote_storage_unlocked\",\n                        [](dsn::string_view) {});\n\n    app_bulk_load_info ainfo = _app_bulk_load_info[app_id];\n    auto old_status = ainfo.status;\n\n    if (old_status == new_status && new_status != bulk_load_status::BLS_DOWNLOADING) {\n        dwarn_f(\"app({}) old status:{} VS new status:{}, ignore it\",\n                ainfo.app_name,\n                dsn::enum_to_string(old_status),\n                dsn::enum_to_string(new_status));\n        return;\n    }\n\n    if (_apps_pending_sync_flag[app_id]) {\n        ddebug_f(\"app({}) has already sync bulk load status, wait and retry, current status = {}, \"\n                 \"new status = {}\",\n                 ainfo.app_name,\n                 dsn::enum_to_string(old_status),\n                 dsn::enum_to_string(new_status));\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         std::bind(&bulk_load_service::update_app_status_on_remote_storage_unlocked,\n                                   this,\n                                   app_id,\n                                   new_status,\n                                   err,\n                                   should_send_request),\n                         0,\n                         std::chrono::seconds(1));\n        return;\n    }\n\n    _apps_pending_sync_flag[app_id] = true;\n\n    if (bulk_load_status::BLS_INGESTING == new_status) {\n        ainfo.is_ever_ingesting = true;\n    }\n    ainfo.status = new_status;\n    ainfo.bulk_load_err = err;\n    blob value = dsn::json::json_forwarder<app_bulk_load_info>::encode(ainfo);\n\n    _meta_svc->get_meta_storage()->set_data(\n        get_app_bulk_load_path(app_id),\n        std::move(value),\n        std::bind(&bulk_load_service::update_app_status_on_remote_storage_reply,\n                  this,\n                  ainfo,\n                  old_status,\n                  new_status,\n                  should_send_request));\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_app_status_on_remote_storage_reply(const app_bulk_load_info &ainfo,\n                                                                  bulk_load_status::type old_status,\n                                                                  bulk_load_status::type new_status,\n                                                                  bool should_send_request)\n{\n    int32_t app_id = ainfo.app_id;\n    int32_t partition_count = ainfo.partition_count;\n    {\n        zauto_write_lock l(_lock);\n        _app_bulk_load_info[app_id] = ainfo;\n        _apps_pending_sync_flag[app_id] = false;\n        _apps_in_progress_count[app_id] = partition_count;\n        // when rollback from ingesting, ingesting_count should be reset\n        if (old_status == bulk_load_status::BLS_INGESTING &&\n            new_status == bulk_load_status::BLS_DOWNLOADING) {\n            reset_app_ingestion(app_id);\n        }\n    }\n\n    ddebug_f(\"update app({}) status from {} to {}\",\n             ainfo.app_name,\n             dsn::enum_to_string(old_status),\n             dsn::enum_to_string(new_status));\n\n    if (new_status == bulk_load_status::BLS_INGESTING) {\n        for (auto i = 0; i < partition_count; ++i) {\n            partition_ingestion(ainfo.app_name, gpid(app_id, i));\n        }\n    }\n\n    if (new_status == bulk_load_status::BLS_PAUSING ||\n        new_status == bulk_load_status::BLS_DOWNLOADING ||\n        new_status == bulk_load_status::BLS_CANCELED ||\n        new_status == bulk_load_status::BLS_FAILED) {\n        for (int i = 0; i < ainfo.partition_count; ++i) {\n            update_partition_info_on_remote_storage(\n                ainfo.app_name, gpid(app_id, i), new_status, should_send_request);\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nbool bulk_load_service::check_ever_ingestion_succeed(const partition_configuration &config,\n                                                     const std::string &app_name,\n                                                     const gpid &pid)\n{\n    partition_bulk_load_info pinfo;\n    {\n        zauto_read_lock l(_lock);\n        pinfo = _partition_bulk_load_info[pid];\n    }\n\n    if (!pinfo.ever_ingest_succeed) {\n        return false;\n    }\n\n    std::vector<rpc_address> current_nodes;\n    current_nodes.emplace_back(config.primary);\n    for (const auto &secondary : config.secondaries) {\n        current_nodes.emplace_back(secondary);\n    }\n\n    std::sort(pinfo.addresses.begin(), pinfo.addresses.end());\n    std::sort(current_nodes.begin(), current_nodes.end());\n    if (current_nodes == pinfo.addresses) {\n        ddebug_f(\"app({}) partition({}) has already executed ingestion succeed\", app_name, pid);\n        update_partition_info_on_remote_storage(app_name, pid, bulk_load_status::BLS_SUCCEED);\n        return true;\n    }\n\n    dwarn_f(\"app({}) partition({}) configuration changed, should executed ingestion again\",\n            app_name,\n            pid);\n    return false;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::partition_ingestion(const std::string &app_name, const gpid &pid)\n{\n    FAIL_POINT_INJECT_F(\"meta_bulk_load_partition_ingestion\", [](dsn::string_view) {});\n\n    auto app_status = get_app_bulk_load_status(pid.get_app_id());\n    if (app_status != bulk_load_status::BLS_INGESTING) {\n        dwarn_f(\"app({}) current status is {}, partition({}), ignore it\",\n                app_name,\n                dsn::enum_to_string(app_status),\n                pid);\n        return;\n    }\n\n    if (is_partition_metadata_not_updated(pid)) {\n        derror_f(\"app({}) partition({}) doesn't have bulk load metadata, set bulk load failed\",\n                 app_name,\n                 pid);\n        handle_bulk_load_failed(pid.get_app_id(), ERR_CORRUPTION);\n        return;\n    }\n\n    partition_configuration pconfig;\n    if (!check_partition_status(app_name,\n                                pid,\n                                true,\n                                std::bind(&bulk_load_service::partition_ingestion,\n                                          this,\n                                          std::placeholders::_1,\n                                          std::placeholders::_2),\n                                pconfig)) {\n        return;\n    }\n\n    if (check_ever_ingestion_succeed(pconfig, app_name, pid)) {\n        return;\n    }\n\n    auto app = get_app(pid.get_app_id());\n    if (!try_partition_ingestion(pconfig, app->helpers->contexts[pid.get_partition_index()])) {\n        dwarn_f(\n            \"app({}) partition({}) couldn't execute ingestion, wait and try later\", app_name, pid);\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         std::bind(&bulk_load_service::partition_ingestion, this, app_name, pid),\n                         pid.thread_hash(),\n                         std::chrono::seconds(5));\n        return;\n    }\n\n    rpc_address primary_addr = pconfig.primary;\n    ballot meta_ballot = pconfig.ballot;\n    tasking::enqueue(LPC_BULK_LOAD_INGESTION,\n                     _meta_svc->tracker(),\n                     std::bind(&bulk_load_service::send_ingestion_request,\n                               this,\n                               app_name,\n                               pid,\n                               primary_addr,\n                               meta_ballot),\n                     0,\n                     std::chrono::seconds(bulk_load_constant::BULK_LOAD_REQUEST_INTERVAL));\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid bulk_load_service::send_ingestion_request(const std::string &app_name,\n                                               const gpid &pid,\n                                               const rpc_address &primary_addr,\n                                               const ballot &meta_ballot)\n{\n    ingestion_request req;\n    req.app_name = app_name;\n    req.ballot = meta_ballot;\n    req.verify_before_ingest = FLAGS_bulk_load_verify_before_ingest;\n    {\n        zauto_read_lock l(_lock);\n        req.metadata = _partition_bulk_load_info[pid].metadata;\n        req.ingest_behind = _app_bulk_load_info[pid.get_app_id()].ingest_behind;\n    }\n    // create a client request, whose gpid field in header should be pid\n    message_ex *msg = message_ex::create_request(dsn::apps::RPC_RRDB_RRDB_BULK_LOAD,\n                                                 0,\n                                                 pid.thread_hash(),\n                                                 static_cast<uint64_t>(pid.get_partition_index()));\n    auto &hdr = *msg->header;\n    hdr.gpid = pid;\n    dsn::marshall(msg, req);\n    dsn::rpc_response_task_ptr rpc_callback = rpc::create_rpc_response_task(\n        msg,\n        _meta_svc->tracker(),\n        [this, app_name, pid, primary_addr](error_code err, ingestion_response &&resp) {\n            on_partition_ingestion_reply(err, std::move(resp), app_name, pid, primary_addr);\n        });\n    _meta_svc->send_request(msg, primary_addr, rpc_callback);\n    ddebug_f(\"send ingest_request to node({}), app({}) partition({})\",\n             primary_addr.to_string(),\n             app_name,\n             pid);\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid bulk_load_service::on_partition_ingestion_reply(error_code err,\n                                                     const ingestion_response &&resp,\n                                                     const std::string &app_name,\n                                                     const gpid &pid,\n                                                     const rpc_address &primary_addr)\n{\n    if (err != ERR_OK || resp.err != ERR_OK || resp.rocksdb_error != ERR_OK) {\n        finish_ingestion(pid);\n    }\n\n    if (err == ERR_NO_NEED_OPERATE) {\n        dwarn_f(\"app({}) partition({}) on node({}) has already executing ingestion, ignore this \"\n                \"repeated request\",\n                app_name,\n                pid,\n                primary_addr.to_string());\n        return;\n    }\n\n    // if meet 2pc error, ingesting will rollback to downloading, no need to retry here\n    if (err != ERR_OK) {\n        derror_f(\"app({}) partition({}) on node({}) ingestion files failed, error = {}\",\n                 app_name,\n                 pid,\n                 primary_addr.to_string(),\n                 err);\n        tasking::enqueue(\n            LPC_META_STATE_NORMAL,\n            _meta_svc->tracker(),\n            std::bind(&bulk_load_service::try_rollback_to_downloading, this, app_name, pid));\n        return;\n    }\n\n    if (resp.err == ERR_TRY_AGAIN && resp.rocksdb_error != 0) {\n        derror_f(\"app({}) partition({}) on node({}) ingestion files failed while empty write, \"\n                 \"rocksdb error = \"\n                 \"{}, retry it later\",\n                 app_name,\n                 pid,\n                 primary_addr.to_string(),\n                 resp.rocksdb_error);\n        tasking::enqueue(LPC_BULK_LOAD_INGESTION,\n                         _meta_svc->tracker(),\n                         std::bind(&bulk_load_service::partition_ingestion, this, app_name, pid),\n                         0,\n                         std::chrono::milliseconds(10));\n        return;\n    }\n\n    // some unexpected errors happened, such as write empty write failed but rocksdb_error is ok\n    // stop bulk load process with failed\n    if (resp.err != ERR_OK || resp.rocksdb_error != 0) {\n        derror_f(\"app({}) partition({}) on node({}) failed to ingestion files, error = {}, rocksdb \"\n                 \"error = {}\",\n                 app_name,\n                 pid,\n                 primary_addr.to_string(),\n                 resp.err,\n                 resp.rocksdb_error);\n\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         std::bind(&bulk_load_service::handle_bulk_load_failed,\n                                   this,\n                                   pid.get_app_id(),\n                                   ERR_INGESTION_FAILED));\n        return;\n    }\n\n    ddebug_f(\"app({}) partition({}) receive ingestion response from node({}) succeed\",\n             app_name,\n             pid,\n             primary_addr.to_string());\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::remove_bulk_load_dir_on_remote_storage(int32_t app_id,\n                                                               const std::string &app_name)\n{\n    std::string bulk_load_path = get_app_bulk_load_path(app_id);\n    _meta_svc->get_meta_storage()->delete_node_recursively(\n        std::move(bulk_load_path), [this, app_id, app_name, bulk_load_path]() {\n            ddebug_f(\"remove app({}) bulk load dir {} succeed\", app_name, bulk_load_path);\n            reset_local_bulk_load_states(app_id, app_name, true);\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::remove_bulk_load_dir_on_remote_storage(std::shared_ptr<app_state> app,\n                                                               bool set_app_not_bulk_loading)\n{\n    std::string bulk_load_path = get_app_bulk_load_path(app->app_id);\n    _meta_svc->get_meta_storage()->delete_node_recursively(\n        std::move(bulk_load_path), [this, app, set_app_not_bulk_loading, bulk_load_path]() {\n            ddebug_f(\"remove app({}) bulk load dir {} succeed\", app->app_name, bulk_load_path);\n            reset_local_bulk_load_states(app->app_id, app->app_name, true);\n            if (set_app_not_bulk_loading) {\n                update_app_not_bulk_loading_on_remote_storage(std::move(app));\n            }\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\ntemplate <typename T>\ninline void erase_map_elem_by_id(int32_t app_id, std::unordered_map<gpid, T> &mymap)\n{\n    for (auto iter = mymap.begin(); iter != mymap.end();) {\n        if (iter->first.get_app_id() == app_id) {\n            mymap.erase(iter++);\n        } else {\n            iter++;\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::reset_local_bulk_load_states_unlocked(int32_t app_id,\n                                                              const std::string &app_name,\n                                                              bool is_reset_result)\n{\n    _apps_in_progress_count.erase(app_id);\n    _apps_pending_sync_flag.erase(app_id);\n    erase_map_elem_by_id(app_id, _partitions_pending_sync_flag);\n    erase_map_elem_by_id(app_id, _partitions_total_download_progress);\n    _apps_rolling_back.erase(app_id);\n    _apps_rollback_count.erase(app_id);\n    reset_app_ingestion(app_id);\n    _bulk_load_app_id.erase(app_id);\n\n    if (is_reset_result) {\n        _app_bulk_load_info.erase(app_id);\n        erase_map_elem_by_id(app_id, _partitions_bulk_load_state);\n        erase_map_elem_by_id(app_id, _partition_bulk_load_info);\n        erase_map_elem_by_id(app_id, _partitions_cleaned_up);\n        _apps_cleaning_up.erase(app_id);\n    }\n\n    ddebug_f(\n        \"reset local app({}) bulk load context, is_reset_result({})\", app_name, is_reset_result);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::reset_local_bulk_load_states(int32_t app_id,\n                                                     const std::string &app_name,\n                                                     bool is_reset_result)\n{\n    zauto_write_lock l(_lock);\n    reset_local_bulk_load_states_unlocked(app_id, app_name, is_reset_result);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::update_app_not_bulk_loading_on_remote_storage(\n    std::shared_ptr<app_state> app)\n{\n    app_info info = *app;\n    info.__set_is_bulk_loading(false);\n\n    blob value = dsn::json::json_forwarder<app_info>::encode(info);\n    _meta_svc->get_meta_storage()->set_data(\n        _state->get_app_path(*app), std::move(value), [app, this]() {\n            zauto_write_lock l(app_lock());\n            app->is_bulk_loading = false;\n            ddebug_f(\"app({}) update app is_bulk_loading to false\", app->app_name);\n            _meta_svc->unlock_meta_op_status();\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::on_control_bulk_load(control_bulk_load_rpc rpc)\n{\n    const std::string &app_name = rpc.request().app_name;\n    const auto &control_type = rpc.request().type;\n    auto &response = rpc.response();\n    response.err = ERR_OK;\n\n    std::shared_ptr<app_state> app = get_app(app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        derror_f(\"app({}) is not existed or not available\", app_name);\n        response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.__set_hint_msg(fmt::format(\"app({}) is not existed or not available\", app_name));\n        return;\n    }\n\n    if (!app->is_bulk_loading) {\n        derror_f(\"app({}) is not executing bulk load\", app_name);\n        response.err = ERR_INACTIVE_STATE;\n        response.__set_hint_msg(fmt::format(\"app({}) is not executing bulk load\", app_name));\n        return;\n    }\n    int32_t app_id = app->app_id;\n\n    zauto_write_lock l(_lock);\n    const auto &app_status = get_app_bulk_load_status_unlocked(app_id);\n    switch (control_type) {\n    case bulk_load_control_type::BLC_PAUSE: {\n        if (app_status != bulk_load_status::BLS_DOWNLOADING) {\n            auto hint_msg = fmt::format(\"can not pause bulk load for app({}) with status({})\",\n                                        app_name,\n                                        dsn::enum_to_string(app_status));\n            derror_f(\"{}\", hint_msg);\n            response.err = ERR_INVALID_STATE;\n            response.__set_hint_msg(hint_msg);\n            return;\n        }\n        ddebug_f(\"app({}) start to pause bulk load\", app_name);\n        update_app_status_on_remote_storage_unlocked(app_id, bulk_load_status::BLS_PAUSING);\n    } break;\n    case bulk_load_control_type::BLC_RESTART: {\n        if (app_status != bulk_load_status::BLS_PAUSED) {\n            auto hint_msg = fmt::format(\"can not restart bulk load for app({}) with status({})\",\n                                        app_name,\n                                        dsn::enum_to_string(app_status));\n            derror_f(\"{}\", hint_msg);\n            response.err = ERR_INVALID_STATE;\n            response.__set_hint_msg(hint_msg);\n            return;\n        }\n        ddebug_f(\"app({}) restart bulk load\", app_name);\n        update_app_status_on_remote_storage_unlocked(\n            app_id, bulk_load_status::BLS_DOWNLOADING, ERR_OK, true);\n    } break;\n    case bulk_load_control_type::BLC_CANCEL:\n        if (app_status != bulk_load_status::BLS_DOWNLOADING &&\n            app_status != bulk_load_status::BLS_PAUSED) {\n            auto hint_msg = fmt::format(\"can not cancel bulk load for app({}) with status({})\",\n                                        app_name,\n                                        dsn::enum_to_string(app_status));\n            derror_f(\"{}\", hint_msg);\n            response.err = ERR_INVALID_STATE;\n            response.__set_hint_msg(hint_msg);\n            return;\n        }\n    case bulk_load_control_type::BLC_FORCE_CANCEL: {\n        ddebug_f(\"app({}) start to {} cancel bulk load, original status = {}\",\n                 app_name,\n                 control_type == bulk_load_control_type::BLC_FORCE_CANCEL ? \"force\" : \"\",\n                 dsn::enum_to_string(app_status));\n        update_app_status_on_remote_storage_unlocked(app_id,\n                                                     bulk_load_status::BLS_CANCELED,\n                                                     ERR_OK,\n                                                     app_status == bulk_load_status::BLS_PAUSED);\n    } break;\n    default:\n        break;\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::on_query_bulk_load_status(query_bulk_load_rpc rpc)\n{\n    const auto &request = rpc.request();\n    const std::string &app_name = request.app_name;\n\n    query_bulk_load_response &response = rpc.response();\n    response.err = ERR_OK;\n    response.app_name = app_name;\n\n    std::shared_ptr<app_state> app = get_app(app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        auto hint_msg = fmt::format(\"app({}) is not existed or not available\", app_name);\n        derror_f(\"{}\", hint_msg);\n        response.err = (app == nullptr) ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.__set_hint_msg(hint_msg);\n        return;\n    }\n\n    if (!app->is_bulk_loading) {\n        auto hint_msg =\n            fmt::format(\"app({}) is not during bulk load, return last time result\", app_name);\n        dwarn_f(\"{}\", hint_msg);\n        response.__set_hint_msg(hint_msg);\n    }\n\n    int32_t app_id = app->app_id;\n    int32_t partition_count = app->partition_count;\n\n    zauto_read_lock l(_lock);\n    response.max_replica_count = app->max_replica_count;\n    response.app_status = get_app_bulk_load_status_unlocked(app_id);\n\n    response.partitions_status.resize(partition_count);\n    for (const auto &kv : _partition_bulk_load_info) {\n        if (kv.first.get_app_id() == app_id) {\n            response.partitions_status[kv.first.get_partition_index()] = kv.second.status;\n        }\n    }\n\n    response.bulk_load_states.resize(partition_count);\n    for (const auto &kv : _partitions_bulk_load_state) {\n        if (kv.first.get_app_id() == app_id) {\n            response.bulk_load_states[kv.first.get_partition_index()] = kv.second;\n        }\n    }\n\n    response.__set_is_bulk_loading(app->is_bulk_loading);\n\n    if (!app->is_bulk_loading && bulk_load_status::BLS_FAILED == response.app_status) {\n        response.err = get_app_bulk_load_err_unlocked(app_id);\n    }\n\n    ddebug_f(\"query app({}) bulk_load_status({}) succeed\",\n             app_name,\n             dsn::enum_to_string(response.app_status));\n}\n\nvoid bulk_load_service::on_clear_bulk_load(clear_bulk_load_rpc rpc)\n{\n    const auto &request = rpc.request();\n    const std::string &app_name = request.app_name;\n    clear_bulk_load_state_response &response = rpc.response();\n\n    std::shared_ptr<app_state> app = get_app(app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        response.err = (app == nullptr) ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.hint_msg = fmt::format(\"app({}) is not existed or not available\", app_name);\n        derror_f(\"{}\", response.hint_msg);\n        return;\n    }\n\n    if (app->is_bulk_loading) {\n        response.err = ERR_INVALID_STATE;\n        response.hint_msg = fmt::format(\"app({}) is executing bulk load\", app_name);\n        derror_f(\"{}\", response.hint_msg);\n        return;\n    }\n\n    do_clear_app_bulk_load_result(app->app_id, rpc);\n}\n\nvoid bulk_load_service::do_clear_app_bulk_load_result(int32_t app_id, clear_bulk_load_rpc rpc)\n{\n    FAIL_POINT_INJECT_F(\"meta_do_clear_app_bulk_load_result\",\n                        [rpc](dsn::string_view) { rpc.response().err = ERR_OK; });\n    std::string bulk_load_path = get_app_bulk_load_path(app_id);\n    _meta_svc->get_meta_storage()->delete_node_recursively(\n        std::move(bulk_load_path), [this, app_id, bulk_load_path, rpc]() {\n            clear_bulk_load_state_response &response = rpc.response();\n            response.err = ERR_OK;\n            response.hint_msg =\n                fmt::format(\"clear app({}) bulk load result succeed, remove bulk load dir succeed\",\n                            rpc.request().app_name);\n            reset_local_bulk_load_states(app_id, rpc.request().app_name, true);\n            ddebug_f(\"{}\", response.hint_msg);\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::create_bulk_load_root_dir()\n{\n    blob value = blob();\n    std::string path = _bulk_load_root;\n    _sync_bulk_load_storage->create_node(std::move(path), std::move(value), [this]() {\n        ddebug_f(\"create bulk load root({}) succeed\", _bulk_load_root);\n        sync_apps_from_remote_storage();\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::sync_apps_from_remote_storage()\n{\n    std::string path = _bulk_load_root;\n    _sync_bulk_load_storage->get_children(\n        std::move(path), [this](bool flag, const std::vector<std::string> &children) {\n            if (flag && children.size() > 0) {\n                ddebug_f(\"There are {} apps need to sync bulk load status\", children.size());\n                for (const auto &elem : children) {\n                    int32_t app_id = boost::lexical_cast<int32_t>(elem);\n                    ddebug_f(\"start to sync app({}) bulk load status\", app_id);\n                    do_sync_app(app_id);\n                }\n            }\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::do_sync_app(int32_t app_id)\n{\n    std::string app_path = get_app_bulk_load_path(app_id);\n    _sync_bulk_load_storage->get_data(std::move(app_path), [this, app_id](const blob &value) {\n        app_bulk_load_info ainfo;\n        dsn::json::json_forwarder<app_bulk_load_info>::decode(value, ainfo);\n        {\n            zauto_write_lock l(_lock);\n            _bulk_load_app_id.insert(app_id);\n            _app_bulk_load_info[app_id] = ainfo;\n        }\n        sync_partitions_from_remote_storage(ainfo.app_id, ainfo.app_name);\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::sync_partitions_from_remote_storage(int32_t app_id,\n                                                            const std::string &app_name)\n{\n    std::string app_path = get_app_bulk_load_path(app_id);\n    _sync_bulk_load_storage->get_children(\n        std::move(app_path),\n        [this, app_path, app_id, app_name](bool flag, const std::vector<std::string> &children) {\n            ddebug_f(\"app(name={},app_id={}) has {} partition bulk load info to be synced\",\n                     app_name,\n                     app_id,\n                     children.size());\n            for (const auto &child_pidx : children) {\n                int32_t pidx = boost::lexical_cast<int32_t>(child_pidx);\n                std::string partition_path = get_partition_bulk_load_path(app_path, pidx);\n                do_sync_partition(gpid(app_id, pidx), partition_path);\n            }\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::do_sync_partition(const gpid &pid, std::string &partition_path)\n{\n    _sync_bulk_load_storage->get_data(std::move(partition_path), [this, pid](const blob &value) {\n        partition_bulk_load_info pinfo;\n        dsn::json::json_forwarder<partition_bulk_load_info>::decode(value, pinfo);\n        {\n            zauto_write_lock l(_lock);\n            _partition_bulk_load_info[pid] = pinfo;\n        }\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::try_to_continue_bulk_load()\n{\n    FAIL_POINT_INJECT_F(\"meta_try_to_continue_bulk_load\", [](dsn::string_view) {});\n    for (const auto app_id : _bulk_load_app_id) {\n        app_bulk_load_info ainfo = _app_bulk_load_info[app_id];\n        // <partition_index, partition_bulk_load_info>\n        std::unordered_map<int32_t, partition_bulk_load_info> pinfo_map;\n        for (const auto &kv : _partition_bulk_load_info) {\n            if (kv.first.get_app_id() == app_id) {\n                pinfo_map[kv.first.get_partition_index()] = kv.second;\n            }\n        }\n        try_to_continue_app_bulk_load(ainfo, pinfo_map);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::try_to_continue_app_bulk_load(\n    const app_bulk_load_info &ainfo,\n    const std::unordered_map<int32_t, partition_bulk_load_info> &pinfo_map)\n{\n    std::shared_ptr<app_state> app = get_app(ainfo.app_name);\n    // if app is not available, remove bulk load dir\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        derror_f(\n            \"app(name={},app_id={}) is not existed or not available\", ainfo.app_name, ainfo.app_id);\n        if (app == nullptr) {\n            remove_bulk_load_dir_on_remote_storage(ainfo.app_id, ainfo.app_name);\n        } else {\n            remove_bulk_load_dir_on_remote_storage(std::move(app), true);\n        }\n        return;\n    }\n\n    // check app bulk load info\n    if (!validate_app(app->app_id, app->partition_count, app->envs, ainfo, pinfo_map.size())) {\n        remove_bulk_load_dir_on_remote_storage(std::move(app), true);\n        return;\n    }\n\n    // index of the partition whose bulk load status is different from app's bulk load status\n    std::unordered_set<int32_t> different_status_pidx_set;\n    for (const auto &kv : pinfo_map) {\n        if (kv.second.status != ainfo.status) {\n            different_status_pidx_set.insert(kv.first);\n        }\n    }\n\n    // check partition bulk load info\n    if (!validate_partition(ainfo, pinfo_map, different_status_pidx_set.size())) {\n        remove_bulk_load_dir_on_remote_storage(std::move(app), true);\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     _meta_svc->tracker(),\n                     std::bind(&bulk_load_service::do_continue_app_bulk_load,\n                               this,\n                               ainfo,\n                               pinfo_map,\n                               different_status_pidx_set));\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\n/*static*/ bool\nbulk_load_service::validate_ingest_behind(const std::map<std::string, std::string> &envs,\n                                          bool ingest_behind)\n{\n    bool app_allow_ingest_behind = false;\n    const auto &iter = envs.find(replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND);\n    if (iter != envs.end()) {\n        if (!buf2bool(iter->second, app_allow_ingest_behind)) {\n            dwarn_f(\"can not convert {} to bool\", iter->second);\n            app_allow_ingest_behind = false;\n        }\n    }\n    if (ingest_behind && !app_allow_ingest_behind) {\n        return false;\n    }\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\n/*static*/ bool bulk_load_service::validate_app(int32_t app_id,\n                                                int32_t partition_count,\n                                                const std::map<std::string, std::string> &envs,\n                                                const app_bulk_load_info &ainfo,\n                                                int32_t pinfo_count)\n{\n    // app id and partition from `app_bulk_load_info` is inconsistent with current app_info\n    if (app_id != ainfo.app_id || partition_count != ainfo.partition_count) {\n        derror_f(\"app({}) has different app_id or partition_count, bulk load app_id = {}, \"\n                 \"partition_count = {}, current app_id = {}, partition_count = {}\",\n                 ainfo.app_name,\n                 ainfo.app_id,\n                 ainfo.partition_count,\n                 app_id,\n                 partition_count);\n        return false;\n    }\n\n    // partition_bulk_load_info count should not be greater than partition_count\n    if (partition_count < pinfo_count) {\n        derror_f(\"app({}) has invalid count, app partition_count = {}, remote \"\n                 \"partition_bulk_load_info count = {}\",\n                 ainfo.app_name,\n                 partition_count,\n                 pinfo_count);\n        return false;\n    }\n\n    // partition_bulk_load_info count is not equal to partition_count can only be happended when app\n    // status is downloading, consider the following condition:\n    // when starting bulk load, meta server will create app_bulk_load_dir and\n    // partition_bulk_load_dir on remote storage\n    // however, meta server crash when create app directory and part of partition directory\n    // when meta server recover, partition directory count is less than partition_count\n    if (pinfo_count != partition_count && ainfo.status != bulk_load_status::BLS_DOWNLOADING) {\n        derror_f(\"app({}) bulk_load_status = {}, but there are {} partitions lack \"\n                 \"partition_bulk_load dir\",\n                 ainfo.app_name,\n                 dsn::enum_to_string(ainfo.status),\n                 partition_count - pinfo_count);\n        return false;\n    }\n\n    if (!validate_ingest_behind(envs, ainfo.ingest_behind)) {\n        derror_f(\"app({}) has inconsistent ingest_behind option\", ainfo.app_name);\n        return false;\n    }\n\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\n/*static*/ bool bulk_load_service::validate_partition(\n    const app_bulk_load_info &ainfo,\n    const std::unordered_map<int32_t, partition_bulk_load_info> &pinfo_map,\n    const int32_t different_status_count)\n{\n    const auto app_status = ainfo.status;\n    bool is_valid = true;\n\n    switch (app_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n        // if app status is downloading, partition status has no limit, because when bulk load meet\n        // recoverable errors, will rollback to downloading\n        // partition directory count is allowed less than partition_count, but it is impossible with\n        // some partition bulk load status is not downloading and some partition directroy is\n        // missing on remote storage\n        if (ainfo.partition_count - pinfo_map.size() > 0 && different_status_count > 0) {\n            derror_f(\"app({}) bulk_load_status = {}, there are {} partitions status is different \"\n                     \"from app, and {} partitions not existed, this is invalid\",\n                     ainfo.app_name,\n                     dsn::enum_to_string(app_status),\n                     different_status_count,\n                     ainfo.partition_count - pinfo_map.size());\n            is_valid = false;\n        }\n        break;\n    case bulk_load_status::BLS_DOWNLOADED:\n    case bulk_load_status::BLS_INGESTING: {\n        // if app status is downloaded, valid partition status is downloaded or ingesting\n        // if app status is ingesting, valid partition status is ingesting or succeed\n        const auto other_valid_status = (app_status == bulk_load_status::BLS_DOWNLOADED)\n                                            ? bulk_load_status::BLS_INGESTING\n                                            : bulk_load_status::BLS_SUCCEED;\n        for (const auto &kv : pinfo_map) {\n            if (kv.second.status != app_status && kv.second.status != other_valid_status) {\n                derror_f(\"app({}) bulk_load_status = {}, but partition[{}] bulk_load_status = {}, \"\n                         \"only {} and {} is valid\",\n                         ainfo.app_name,\n                         app_status,\n                         kv.first,\n                         dsn::enum_to_string(kv.second.status),\n                         dsn::enum_to_string(app_status),\n                         dsn::enum_to_string(other_valid_status));\n                is_valid = false;\n                break;\n            }\n        }\n    } break;\n    case bulk_load_status::BLS_SUCCEED:\n    case bulk_load_status::BLS_PAUSED:\n        // if app status is succeed or paused, all partitions' status should not be different from\n        // app's\n        if (different_status_count > 0) {\n            derror_f(\"app({}) bulk_load_status = {}, {} partitions status is different from app, \"\n                     \"this is invalid\",\n                     ainfo.app_name,\n                     dsn::enum_to_string(app_status),\n                     different_status_count);\n            is_valid = false;\n        }\n        break;\n    default:\n        // for other status, partition status has no limit\n        break;\n    }\n\n    return is_valid;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::do_continue_app_bulk_load(\n    const app_bulk_load_info &ainfo,\n    const std::unordered_map<int32_t, partition_bulk_load_info> &pinfo_map,\n    const std::unordered_set<int32_t> &different_status_pidx_set)\n{\n    const int32_t app_id = ainfo.app_id;\n    const int32_t partition_count = ainfo.partition_count;\n    const auto app_status = ainfo.status;\n    const int32_t different_count = different_status_pidx_set.size();\n    const int32_t same_count = pinfo_map.size() - different_count;\n    const int32_t invalid_count = partition_count - pinfo_map.size();\n\n    if (!FLAGS_enable_concurrent_bulk_load &&\n        !_meta_svc->try_lock_meta_op_status(meta_op_status::BULKLOAD)) {\n        derror_f(\"fatal, the op status of meta server must be meta_op_status::FREE\");\n        return;\n    }\n    ddebug_f(\n        \"app({}) continue bulk load, app_id = {}, partition_count = {}, status = {}, there are {} \"\n        \"partitions have bulk_load_info, {} partitions have same status with app, {} \"\n        \"partitions different\",\n        ainfo.app_name,\n        app_id,\n        partition_count,\n        dsn::enum_to_string(app_status),\n        pinfo_map.size(),\n        same_count,\n        different_count);\n\n    // _apps_in_progress_count is used for updating app bulk load, when _apps_in_progress_count = 0\n    // means app bulk load status can transfer to next stage, for example, when app status is\n    // downloaded, and _apps_in_progress_count = 0, app status can turn to ingesting\n    // see more in function `update_partition_info_on_remote_storage_reply`\n    int32_t in_progress_partition_count = partition_count;\n    if (app_status == bulk_load_status::BLS_DOWNLOADING) {\n        if (invalid_count > 0) {\n            // create missing partition, so the in_progress_count should be invalid_count\n            in_progress_partition_count = invalid_count;\n        } else if (different_count > 0) {\n            // it is hard to distinguish that bulk load is normal downloading or rollback to\n            // downloading before meta server crash, when app status is downloading, we consider\n            // bulk load as rolling back to downloading for convenience, for partitions whose status\n            // is not downloading, update them to downloading, so the in_progress_count should be\n            // different_count\n            in_progress_partition_count = different_count;\n        }\n    } else if (app_status == bulk_load_status::BLS_DOWNLOADED ||\n               app_status == bulk_load_status::BLS_INGESTING ||\n               app_status == bulk_load_status::BLS_SUCCEED) {\n        // for app status is downloaded, when all partitions turn to ingesting, app partition will\n        // turn to ingesting, so the in_progress_count should be same_count, ingesting and succeed\n        // are same\n        in_progress_partition_count = same_count;\n    } // for other cases, in_progress_count should be partition_count\n    {\n        zauto_write_lock l(_lock);\n        _apps_in_progress_count[app_id] = in_progress_partition_count;\n        _apps_rollback_count[app_id] = 0;\n    }\n\n    // if app is paused, no need to send bulk_load_request, just return\n    if (app_status == bulk_load_status::BLS_PAUSED) {\n        return;\n    }\n\n    // create all missing partitions then send request to all partitions\n    if (app_status == bulk_load_status::BLS_DOWNLOADING && invalid_count > 0) {\n        for (auto i = 0; i < partition_count; ++i) {\n            if (pinfo_map.find(i) == pinfo_map.end()) {\n                create_missing_partition_dir(ainfo.app_name, gpid(app_id, i), partition_count);\n            }\n        }\n        return;\n    }\n\n    // update all partition status to app_status\n    if ((app_status == bulk_load_status::BLS_FAILED ||\n         app_status == bulk_load_status::BLS_CANCELED ||\n         app_status == bulk_load_status::BLS_PAUSING ||\n         app_status == bulk_load_status::BLS_DOWNLOADING) &&\n        different_count > 0) {\n        for (auto pidx : different_status_pidx_set) {\n            update_partition_info_on_remote_storage(ainfo.app_name, gpid(app_id, pidx), app_status);\n        }\n    }\n\n    // send bulk_load_request to all partitions\n    for (auto i = 0; i < partition_count; ++i) {\n        gpid pid = gpid(app_id, i);\n        partition_bulk_load(ainfo.app_name, pid);\n        if (app_status == bulk_load_status::BLS_INGESTING) {\n            partition_ingestion(ainfo.app_name, pid);\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid bulk_load_service::create_missing_partition_dir(const std::string &app_name,\n                                                     const gpid &pid,\n                                                     int32_t partition_count)\n{\n    partition_bulk_load_info pinfo;\n    pinfo.status = bulk_load_status::BLS_DOWNLOADING;\n    blob value = dsn::json::json_forwarder<partition_bulk_load_info>::encode(pinfo);\n\n    _meta_svc->get_meta_storage()->create_node(\n        get_partition_bulk_load_path(pid),\n        std::move(value),\n        [app_name, pid, partition_count, pinfo, this]() {\n            const int32_t app_id = pid.get_app_id();\n            bool send_request = false;\n            ddebug_f(\"app({}) create partition({}) bulk_load_info\", app_name, pid);\n            {\n                zauto_write_lock l(_lock);\n                _partition_bulk_load_info[pid] = pinfo;\n\n                if (--_apps_in_progress_count[app_id] == 0) {\n                    _apps_in_progress_count[app_id] = partition_count;\n                    send_request = true;\n                }\n            }\n            if (send_request) {\n                ddebug_f(\"app({}) start to bulk load\", app_name);\n                for (auto i = 0; i < partition_count; ++i) {\n                    partition_bulk_load(app_name, gpid(app_id, i));\n                }\n            }\n        });\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid bulk_load_service::check_app_bulk_load_states(std::shared_ptr<app_state> app,\n                                                   bool is_app_bulk_loading)\n{\n    std::string app_path = get_app_bulk_load_path(app->app_id);\n    _meta_svc->get_remote_storage()->node_exist(\n        app_path, LPC_META_CALLBACK, [this, app_path, app, is_app_bulk_loading](error_code err) {\n            if (err != ERR_OK && err != ERR_OBJECT_NOT_FOUND) {\n                dwarn_f(\"check app({}) bulk load dir({}) failed, error = {}, try later\",\n                        app->app_name,\n                        app_path,\n                        err);\n                tasking::enqueue(LPC_META_CALLBACK,\n                                 nullptr,\n                                 std::bind(&bulk_load_service::check_app_bulk_load_states,\n                                           this,\n                                           app,\n                                           is_app_bulk_loading),\n                                 0,\n                                 std::chrono::seconds(1));\n                return;\n            }\n\n            if (err == ERR_OBJECT_NOT_FOUND && is_app_bulk_loading) {\n                derror_f(\"app({}): bulk load dir({}) not exist, but is_bulk_loading = {}, reset \"\n                         \"app is_bulk_loading flag\",\n                         app->app_name,\n                         app_path,\n                         is_app_bulk_loading);\n                update_app_not_bulk_loading_on_remote_storage(std::move(app));\n                return;\n            }\n\n            // Normal cases:\n            // err = ERR_OBJECT_NOT_FOUND, is_app_bulk_load = false: app is not executing bulk load\n            // err = ERR_OK, is_app_bulk_load = true: app used to be executing bulk load\n        });\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_bulk_load_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta_bulk_load_ingestion_context.h\"\n#include \"meta_service.h\"\n#include \"server_state.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint32(bulk_load_max_rollback_times);\nDSN_DECLARE_bool(enable_concurrent_bulk_load);\n\n///\n/// bulk load path on remote storage:\n/// <cluster_root>/bulk_load/<app_id> -> app_bulk_load_info\n/// <cluster_root>/bulk_load/<app_id>/<pidx> -> partition_bulk_load_info\n///\nstruct app_bulk_load_info\n{\n    int32_t app_id;\n    int32_t partition_count;\n    std::string app_name;\n    std::string cluster_name;\n    std::string file_provider_type;\n    bulk_load_status::type status;\n    std::string remote_root_path;\n    bool ingest_behind;\n    bool is_ever_ingesting;\n    error_code bulk_load_err;\n    DEFINE_JSON_SERIALIZATION(app_id,\n                              partition_count,\n                              app_name,\n                              cluster_name,\n                              file_provider_type,\n                              status,\n                              remote_root_path,\n                              ingest_behind,\n                              is_ever_ingesting,\n                              bulk_load_err)\n};\n\nstruct partition_bulk_load_info\n{\n    bulk_load_status::type status;\n    bulk_load_metadata metadata;\n    bool ever_ingest_succeed;\n    std::vector<rpc_address> addresses;\n    DEFINE_JSON_SERIALIZATION(status, metadata, ever_ingest_succeed, addresses)\n};\n\n// Used for remote file provider\nstruct bulk_load_info\n{\n    int32_t app_id;\n    std::string app_name;\n    int32_t partition_count;\n    DEFINE_JSON_SERIALIZATION(app_id, app_name, partition_count)\n};\n\n///\n/// Bulk load process:\n/// when client sent `start_bulk_load_rpc` to meta server to start bulk load,\n/// meta server create bulk load structures on remote storage, and send `RPC_BULK_LOAD` rpc to\n/// each primary replica periodically until bulk load succeed or failed. whole process below:\n///\n///           start bulk load\n///                  |\n///                  v\n/// remove previous bulk load info on remote storage\n///                  |\n///                  v\n///          is_bulk_loading = true\n///                  |\n///                  v\n///     create bulk load info on remote storage\n///                  |\n///         Err      v\n///     ---------Downloading <---------|\n///     |  Too many  |                 |\n///     |  rollback  |                 |\n///     |            v         Err     |\n///     |        Downloaded  --------->|\n///     |            |                 |\n///     | IngestErr  v         Err     |\n///     |<------- Ingesting  --------->|\n///     |            |\n///     v            v\n///   Failed       Succeed\n///     |            |\n///     |            v\n///     |---> is_bulk_loading = false\n///                  |\n///                  v\n///            bulk load end\n\nclass bulk_load_service\n{\npublic:\n    explicit bulk_load_service(meta_service *meta_svc, const std::string &bulk_load_dir);\n\n    void initialize_bulk_load_service();\n\n    // client -> meta server to start bulk load\n    void on_start_bulk_load(start_bulk_load_rpc rpc);\n    // client -> meta server to pause/restart/cancel/force_cancel bulk load\n    void on_control_bulk_load(control_bulk_load_rpc rpc);\n    // client -> meta server to query bulk load status\n    void on_query_bulk_load_status(query_bulk_load_rpc rpc);\n    // client -> meta server to clear bulk load state\n    void on_clear_bulk_load(clear_bulk_load_rpc rpc);\n\n    // Called by `sync_apps_from_remote_storage`, check bulk load state consistency\n    // Handle inconsistent conditions below:\n    // - app is_bulk_loading = true, app_bulk_load_info not existed, set is_bulk_loading=false\n    // - app is_bulk_loading = false, app_bulk_load_info existed, remove useless app bulk load on\n    // remote storage\n    void check_app_bulk_load_states(std::shared_ptr<app_state> app, bool is_app_bulk_loading);\n\nprivate:\n    // Called by `on_start_bulk_load`, check request params\n    // - ERR_OK: pass params check\n    // - ERR_INVALID_PARAMETERS: wrong file_provider type\n    // - ERR_FILE_OPERATION_FAILED: file_provider error\n    // - ERR_OBJECT_NOT_FOUND: bulk_load_info not exist, may wrong cluster_name or app_name\n    // - ERR_CORRUPTION: bulk_load_info is damaged on file_provider\n    // - ERR_INCONSISTENT_STATE: app_id or partition_count inconsistent\n    error_code check_bulk_load_request_params(const start_bulk_load_request &request,\n                                              const int32_t app_id,\n                                              const int32_t partition_count,\n                                              const std::map<std::string, std::string> &envs,\n                                              std::string &hint_msg);\n\n    void do_start_app_bulk_load(std::shared_ptr<app_state> app, start_bulk_load_rpc rpc);\n\n    void do_clear_app_bulk_load_result(int32_t app_id, clear_bulk_load_rpc rpc);\n\n    // Called by `partition_bulk_load` and `partition_ingestion`\n    // check partition status before sending partition_bulk_load_request and\n    // partition_ingestion_request\n    bool check_partition_status(\n        const std::string &app_name,\n        const gpid &pid,\n        bool always_unhealthy_check,\n        const std::function<void(const std::string &, const gpid &)> &retry_function,\n        /*out*/ partition_configuration &pconfig);\n\n    void partition_bulk_load(const std::string &app_name, const gpid &pid);\n\n    void on_partition_bulk_load_reply(error_code err,\n                                      const bulk_load_request &request,\n                                      const bulk_load_response &response);\n\n    // if app is still in bulk load, resend bulk_load_request to primary after interval seconds\n    void try_resend_bulk_load_request(const std::string &app_name, const gpid &pid);\n\n    void handle_app_downloading(const bulk_load_response &response,\n                                const rpc_address &primary_addr);\n\n    void handle_app_ingestion(const bulk_load_response &response, const rpc_address &primary_addr);\n\n    // when app status is `succeed, `failed`, `canceled`, meta and replica should cleanup bulk load\n    // states\n    void handle_bulk_load_finish(const bulk_load_response &response,\n                                 const rpc_address &primary_addr);\n\n    void handle_app_pausing(const bulk_load_response &response, const rpc_address &primary_addr);\n\n    // app not existed or not available during bulk load\n    void handle_app_unavailable(int32_t app_id, const std::string &app_name);\n\n    void try_rollback_to_downloading(const std::string &app_name, const gpid &pid);\n\n    void handle_bulk_load_failed(int32_t app_id, error_code err);\n\n    // Called when app bulk load status update to ingesting\n    // create ingestion_request and send it to primary\n    void partition_ingestion(const std::string &app_name, const gpid &pid);\n\n    void send_ingestion_request(const std::string &app_name,\n                                const gpid &pid,\n                                const rpc_address &primary_addr,\n                                const ballot &meta_ballot);\n\n    void on_partition_ingestion_reply(error_code err,\n                                      const ingestion_response &&resp,\n                                      const std::string &app_name,\n                                      const gpid &pid,\n                                      const rpc_address &primary_addr);\n\n    // Called by `partition_ingestion`\n    // - true : this partition has ever executed ingestion succeed, no need to send ingestion\n    // request\n    // - false: this partition has not executed ingestion or executed ingestion failed\n    bool check_ever_ingestion_succeed(const partition_configuration &config,\n                                      const std::string &app_name,\n                                      const gpid &pid);\n\n    // is_reset_all\n    // - true  : reset all states in memory\n    // - false : keep the bulk load results in memory, reset others\n    void reset_local_bulk_load_states_unlocked(int32_t app_id,\n                                               const std::string &app_name,\n                                               bool is_reset_all);\n    void\n    reset_local_bulk_load_states(int32_t app_id, const std::string &app_name, bool is_reset_all);\n\n    ///\n    /// ingestion_context functions\n    ///\n    bool try_partition_ingestion(const partition_configuration &config, const config_context &cc)\n    {\n        return _ingestion_context->try_partition_ingestion(config, cc);\n    }\n\n    void finish_ingestion(const gpid &pid) { _ingestion_context->remove_partition(pid); }\n\n    const int32_t get_app_ingesting_count(const int32_t app_id) const\n    {\n        return _ingestion_context->get_app_ingesting_count(app_id);\n    }\n\n    void reset_app_ingestion(const int32_t app_id) { _ingestion_context->reset_app(app_id); }\n\n    ///\n    /// update bulk load states to remote storage functions\n    ///\n\n    void create_app_bulk_load_dir(const std::string &app_name,\n                                  int32_t app_id,\n                                  int32_t partition_count,\n                                  start_bulk_load_rpc rpc);\n\n    void create_partition_bulk_load_dir(const std::string &app_name,\n                                        const gpid &pid,\n                                        int32_t partition_count,\n                                        start_bulk_load_rpc rpc);\n\n    // Called by `handle_app_downloading`\n    // update partition bulk load metadata reported by replica server on remote storage\n    void update_partition_metadata_on_remote_storage(const std::string &app_name,\n                                                     const gpid &pid,\n                                                     const bulk_load_metadata &metadata);\n\n    // update partition bulk load info on remote storage\n    // if should_send_request = true, will send bulk load request after update local partition\n    // status, this parameter will be true when restarting bulk load, status will turn from paused\n    // to downloading\n    void update_partition_info_on_remote_storage(const std::string &app_name,\n                                                 const gpid &pid,\n                                                 bulk_load_status::type new_status,\n                                                 bool should_send_request = false);\n\n    void update_partition_info_unlock(const gpid &pid,\n                                      bulk_load_status::type new_status,\n                                      /*out*/ partition_bulk_load_info &pinfo);\n\n    void update_partition_info_on_remote_storage_reply(const std::string &app_name,\n                                                       const gpid &pid,\n                                                       const partition_bulk_load_info &new_info,\n                                                       bool should_send_request);\n\n    // update app bulk load status on remote storage\n    void update_app_status_on_remote_storage_unlocked(int32_t app_id,\n                                                      bulk_load_status::type new_status,\n                                                      error_code err = ERR_OK,\n                                                      bool should_send_request = false);\n\n    void update_app_status_on_remote_storage_reply(const app_bulk_load_info &ainfo,\n                                                   bulk_load_status::type old_status,\n                                                   bulk_load_status::type new_status,\n                                                   bool should_send_request);\n\n    // called when app is not available or dropped during bulk load, remove bulk load directory on\n    // remote storage\n    void remove_bulk_load_dir_on_remote_storage(int32_t app_id, const std::string &app_name);\n\n    // called when app is available, remove bulk load directory on remote storage\n    // if `set_app_not_bulk_loading` = true: call function\n    // `update_app_not_bulk_loading_on_remote_storage` to set app not bulk_loading after removing\n    void remove_bulk_load_dir_on_remote_storage(std::shared_ptr<app_state> app,\n                                                bool set_app_not_bulk_loading);\n\n    // update app's is_bulk_loading to false on remote_storage\n    void update_app_not_bulk_loading_on_remote_storage(std::shared_ptr<app_state> app);\n\n    ///\n    /// sync bulk load states from remote storage\n    /// called when service initialized or meta server leader switch\n    ///\n    void create_bulk_load_root_dir();\n\n    void sync_apps_from_remote_storage();\n\n    void do_sync_app(int32_t app_id);\n\n    void sync_partitions_from_remote_storage(int32_t app_id, const std::string &app_name);\n\n    void do_sync_partition(const gpid &pid, std::string &partition_path);\n\n    ///\n    /// try to continue bulk load according to states from remote storage\n    /// called when service initialized or meta server leader switch\n    ///\n    void try_to_continue_bulk_load();\n\n    void try_to_continue_app_bulk_load(\n        const app_bulk_load_info &ainfo,\n        const std::unordered_map<int32_t, partition_bulk_load_info> &partition_map);\n\n    static bool validate_ingest_behind(const std::map<std::string, std::string> &envs,\n                                       bool ingest_behind);\n\n    static bool validate_app(int32_t app_id,\n                             int32_t partition_count,\n                             const std::map<std::string, std::string> &envs,\n                             const app_bulk_load_info &ainfo,\n                             int32_t pinfo_count);\n\n    static bool\n    validate_partition(const app_bulk_load_info &ainfo,\n                       const std::unordered_map<int32_t, partition_bulk_load_info> &pinfo_map,\n                       const int32_t different_status_count);\n\n    void do_continue_app_bulk_load(\n        const app_bulk_load_info &ainfo,\n        const std::unordered_map<int32_t, partition_bulk_load_info> &pinfo_map,\n        const std::unordered_set<int32_t> &different_status_pidx_set);\n\n    // called by `do_continue_app_bulk_load`\n    // only used when app status is downloading and some partition bulk load info not existed on\n    // remote storage\n    void create_missing_partition_dir(const std::string &app_name,\n                                      const gpid &pid,\n                                      int32_t partition_count);\n\n    ///\n    /// helper functions\n    ///\n    inline std::shared_ptr<app_state> get_app(const std::string &name)\n    {\n        zauto_read_lock l(app_lock());\n        return _state->get_app(name);\n    }\n\n    inline std::shared_ptr<app_state> get_app(int32_t app_id)\n    {\n        zauto_read_lock l(app_lock());\n        return _state->get_app(app_id);\n    }\n\n    // get bulk_load_info path on file provider\n    // <remote_root_path>/<cluster_name>/<app_name>/bulk_load_info\n    inline std::string get_bulk_load_info_path(const std::string &app_name,\n                                               const std::string &cluster_name,\n                                               const std::string &remote_root_path) const\n    {\n        std::ostringstream oss;\n        oss << remote_root_path << \"/\" << cluster_name << \"/\" << app_name << \"/\"\n            << bulk_load_constant::BULK_LOAD_INFO;\n        return oss.str();\n    }\n\n    // get app_bulk_load_info path on remote storage\n    // <_bulk_load_root>/<app_id>\n    inline std::string get_app_bulk_load_path(int32_t app_id) const\n    {\n        std::stringstream oss;\n        oss << _bulk_load_root << \"/\" << app_id;\n        return oss.str();\n    }\n\n    // get partition_bulk_load_info path on remote storage\n    // <_bulk_load_root>/<app_id>/<partition_id>\n    inline std::string get_partition_bulk_load_path(const std::string &app_bulk_load_path,\n                                                    int partition_id) const\n    {\n        std::stringstream oss;\n        oss << app_bulk_load_path << \"/\" << partition_id;\n        return oss.str();\n    }\n\n    inline std::string get_partition_bulk_load_path(const gpid &pid) const\n    {\n        std::stringstream oss;\n        oss << get_app_bulk_load_path(pid.get_app_id()) << \"/\" << pid.get_partition_index();\n        return oss.str();\n    }\n\n    inline bool is_partition_metadata_not_updated(gpid pid)\n    {\n        zauto_read_lock l(_lock);\n        return is_partition_metadata_not_updated_unlocked(pid);\n    }\n\n    inline bool is_partition_metadata_not_updated_unlocked(gpid pid) const\n    {\n        const auto &iter = _partition_bulk_load_info.find(pid);\n        if (iter == _partition_bulk_load_info.end()) {\n            return false;\n        }\n        const auto &metadata = iter->second.metadata;\n        return (metadata.files.size() == 0 && metadata.file_total_size == 0);\n    }\n\n    inline bulk_load_status::type get_partition_bulk_load_status_unlocked(gpid pid) const\n    {\n        const auto &iter = _partition_bulk_load_info.find(pid);\n        if (iter != _partition_bulk_load_info.end()) {\n            return iter->second.status;\n        } else {\n            return bulk_load_status::BLS_INVALID;\n        }\n    }\n\n    inline bulk_load_status::type get_app_bulk_load_status(int32_t app_id)\n    {\n        zauto_read_lock l(_lock);\n        return get_app_bulk_load_status_unlocked(app_id);\n    }\n\n    inline bulk_load_status::type get_app_bulk_load_status_unlocked(int32_t app_id) const\n    {\n        const auto &iter = _app_bulk_load_info.find(app_id);\n        if (iter != _app_bulk_load_info.end()) {\n            return iter->second.status;\n        } else {\n            return bulk_load_status::BLS_INVALID;\n        }\n    }\n\n    inline error_code get_app_bulk_load_err_unlocked(int32_t app_id) const\n    {\n        const auto &iter = _app_bulk_load_info.find(app_id);\n        if (iter != _app_bulk_load_info.end()) {\n            return iter->second.bulk_load_err;\n        } else {\n            return ERR_OK;\n        }\n    }\n\n    inline bool is_app_bulk_loading_unlocked(int32_t app_id) const\n    {\n        return (_bulk_load_app_id.find(app_id) != _bulk_load_app_id.end());\n    }\n\nprivate:\n    friend class bulk_load_service_test;\n    friend class meta_bulk_load_http_test;\n\n    meta_service *_meta_svc;\n    server_state *_state;\n\n    std::unique_ptr<mss::meta_storage> _sync_bulk_load_storage;\n    std::unique_ptr<ingestion_context> _ingestion_context;\n    task_tracker _sync_tracker;\n\n    zrwlock_nr &app_lock() const { return _state->_lock; }\n    zrwlock_nr _lock; // bulk load states lock\n\n    const std::string _bulk_load_root; // <cluster_root>/bulk_load\n\n    /// bulk load states\n    std::unordered_set<int32_t> _bulk_load_app_id;\n    std::unordered_map<app_id, app_bulk_load_info> _app_bulk_load_info;\n\n    std::unordered_map<app_id, int32_t> _apps_in_progress_count;\n    std::unordered_map<app_id, bool> _apps_pending_sync_flag;\n\n    std::unordered_map<gpid, partition_bulk_load_info> _partition_bulk_load_info;\n    std::unordered_map<gpid, bool> _partitions_pending_sync_flag;\n\n    // partition_index -> group total download progress\n    std::unordered_map<gpid, int32_t> _partitions_total_download_progress;\n    // partition_index -> group bulk load states(node address -> state)\n    std::unordered_map<gpid, std::map<rpc_address, partition_bulk_load_state>>\n        _partitions_bulk_load_state;\n\n    std::unordered_map<gpid, bool> _partitions_cleaned_up;\n    // Used for bulk load failed and app unavailable to avoid duplicated clean up\n    std::unordered_map<app_id, bool> _apps_cleaning_up;\n    // Used for bulk load rolling back to downloading\n    std::unordered_map<app_id, bool> _apps_rolling_back;\n    // Used for restrict bulk load rollback count\n    std::unordered_map<app_id, int32_t> _apps_rollback_count;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_data.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's date structure, impl file\n *\n * Revision history:\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#include <boost/lexical_cast.hpp>\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/flags.h>\n\n#include \"meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\n\n// There is an option `max_replicas_in_group` which restricts the max replica count of the whole\n// cluster. It's a cluster-level option. However, now that it's allowed to update the replication\n// factor of each table, this cluster-level option should be replaced.\n//\n// Conceptually `max_replicas_in_group` is the total number of alive and dropped replicas. Its\n// default value is 4. For a table that has replication factor 3, that `max_replicas_in_group`\n// is set to 4 means 3 alive replicas plus a dropped replica.\n//\n// `max_replicas_in_group` can also be loaded from configuration file, which means its default\n// value will be overridden. The value of `max_replicas_in_group` will be assigned to another\n// static variable `MAX_REPLICA_COUNT_IN_GRROUP`, whose default value is also 4.\n//\n// For unit tests, `MAX_REPLICA_COUNT_IN_GRROUP` is set to the default value 4; for production\n// environments, `MAX_REPLICA_COUNT_IN_GRROUP` is set to 3 since `max_replicas_in_group` is\n// configured as 3 in `.ini` file.\n//\n// Since the cluster-level option `max_replicas_in_group` contains the alive and dropped replicas,\n// we can use the replication factor of each table as the number of alive replicas, and introduce\n// another option `max_reserved_dropped_replicas` representing the max reserved number allowed for\n// dropped replicas.\n//\n// If `max_reserved_dropped_replicas` is set to 1, there is at most one dropped replicas reserved,\n// which means, once the number of alive replicas reaches max_replica_count, at most one dropped\n// replica can be reserved and others will be eliminated; If `max_reserved_dropped_replicas` is\n// set to 0, however, none of dropped replicas can be reserved.\n//\n// To be consistent with `max_replicas_in_group`, default value of `max_reserved_dropped_replicas`\n// is set to 1 so that the unit tests can be passed. For production environments, it should be set\n// to 0.\nDSN_DEFINE_uint32(\"meta_server\",\n                  max_reserved_dropped_replicas,\n                  1,\n                  \"max reserved number allowed for dropped replicas\");\nDSN_TAG_VARIABLE(max_reserved_dropped_replicas, FT_MUTABLE);\n\nvoid when_update_replicas(config_type::type t, const std::function<void(bool)> &func)\n{\n    switch (t) {\n    case config_type::CT_ASSIGN_PRIMARY:\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n    case config_type::CT_UPGRADE_TO_SECONDARY:\n        func(true);\n        break;\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n    case config_type::CT_REMOVE:\n    case config_type::CT_DROP_PARTITION:\n        func(false);\n        break;\n    default:\n        break;\n    }\n}\n\nvoid maintain_drops(std::vector<rpc_address> &drops, const rpc_address &node, config_type::type t)\n{\n    auto action = [&drops, &node](bool is_adding) {\n        auto it = std::find(drops.begin(), drops.end(), node);\n        if (is_adding) {\n            if (it != drops.end()) {\n                drops.erase(it);\n            }\n        } else {\n            dassert(it == drops.end(),\n                    \"the node(%s) cannot be in drops set before this update\",\n                    node.to_string());\n            drops.push_back(node);\n            if (drops.size() > 3) {\n                drops.erase(drops.begin());\n            }\n        }\n    };\n    when_update_replicas(t, action);\n}\n\nbool construct_replica(meta_view view, const gpid &pid, int max_replica_count)\n{\n    partition_configuration &pc = *get_config(*view.apps, pid);\n    config_context &cc = *get_config_context(*view.apps, pid);\n\n    dassert(replica_count(pc) == 0,\n            \"replica count of gpid(%d.%d) must be 0\",\n            pid.get_app_id(),\n            pid.get_partition_index());\n    dassert(\n        max_replica_count > 0, \"max replica count is %d, should be at lease 1\", max_replica_count);\n\n    std::vector<dropped_replica> &drop_list = cc.dropped;\n    if (drop_list.empty()) {\n        dwarn(\"construct for (%d.%d) failed, coz no replicas collected\",\n              pid.get_app_id(),\n              pid.get_partition_index());\n        return false;\n    }\n\n    // treat last server in drop_list as the primary\n    const dropped_replica &server = drop_list.back();\n    dassert(server.ballot != invalid_ballot,\n            \"the ballot of server must not be invalid_ballot, node = %s\",\n            server.node.to_string());\n    pc.primary = server.node;\n    pc.ballot = server.ballot;\n    pc.partition_flags = 0;\n    pc.max_replica_count = max_replica_count;\n\n    ddebug(\"construct for (%d.%d), select %s as primary, ballot(%\" PRId64\n           \"), committed_decree(%\" PRId64 \"), prepare_decree(%\" PRId64 \")\",\n           pid.get_app_id(),\n           pid.get_partition_index(),\n           server.node.to_string(),\n           server.ballot,\n           server.last_committed_decree,\n           server.last_prepared_decree);\n\n    drop_list.pop_back();\n\n    // we put max_replica_count-1 recent replicas to last_drops, in case of the DDD-state when the\n    // only primary dead\n    // when add node to pc.last_drops, we don't remove it from our cc.drop_list\n    dassert(pc.last_drops.empty(),\n            \"last_drops of partition(%d.%d) must be empty\",\n            pid.get_app_id(),\n            pid.get_partition_index());\n    for (auto iter = drop_list.rbegin(); iter != drop_list.rend(); ++iter) {\n        if (pc.last_drops.size() + 1 >= max_replica_count)\n            break;\n        // similar to cc.drop_list, pc.last_drop is also a stack structure\n        pc.last_drops.insert(pc.last_drops.begin(), iter->node);\n        ddebug(\"construct for (%d.%d), select %s into last_drops, ballot(%\" PRId64\n               \"), committed_decree(%\" PRId64 \"), prepare_decree(%\" PRId64 \")\",\n               pid.get_app_id(),\n               pid.get_partition_index(),\n               iter->node.to_string(),\n               iter->ballot,\n               iter->last_committed_decree,\n               iter->last_prepared_decree);\n    }\n\n    cc.prefered_dropped = (int)drop_list.size() - 1;\n    return true;\n}\n\nbool collect_replica(meta_view view, const rpc_address &node, const replica_info &info)\n{\n    partition_configuration &pc = *get_config(*view.apps, info.pid);\n    // current partition is during partition split\n    if (pc.ballot == invalid_ballot)\n        return false;\n    config_context &cc = *get_config_context(*view.apps, info.pid);\n    if (is_member(pc, node)) {\n        cc.collect_serving_replica(node, info);\n        return true;\n    }\n\n    // compare current node's replica information with current proposal,\n    // and try to find abnormal situations in send proposal\n    cc.adjust_proposal(node, info);\n\n    // adjust the drop list\n    int ans = cc.collect_drop_replica(node, info);\n    dassert(cc.check_order(), \"\");\n\n    return info.status == partition_status::PS_POTENTIAL_SECONDARY || ans != -1;\n}\n\nproposal_actions::proposal_actions() : from_balancer(false) { reset_tracked_current_learner(); }\n\nvoid proposal_actions::reset_tracked_current_learner()\n{\n    learning_progress_abnormal_detected = false;\n    current_learner.ballot = invalid_ballot;\n    current_learner.last_durable_decree = invalid_decree;\n    current_learner.last_committed_decree = invalid_decree;\n    current_learner.last_prepared_decree = invalid_decree;\n}\n\nvoid proposal_actions::track_current_learner(const dsn::rpc_address &node, const replica_info &info)\n{\n    if (empty())\n        return;\n    configuration_proposal_action &act = acts.front();\n    if (act.node != node)\n        return;\n\n    // currently we only handle add secondary\n    // TODO: adjust other proposals according to replica info collected\n    if (act.type == config_type::CT_ADD_SECONDARY ||\n        act.type == config_type::CT_ADD_SECONDARY_FOR_LB) {\n\n        if (info.status == partition_status::PS_ERROR ||\n            info.status == partition_status::PS_INACTIVE) {\n            // if we've collected inforamtions for the learner, then it claims it's down\n            // we will treat the learning process failed\n            if (current_learner.ballot != invalid_ballot) {\n                ddebug(\"%d.%d: a learner's is down to status(%s), perhaps learn failed\",\n                       info.pid.get_app_id(),\n                       info.pid.get_partition_index(),\n                       dsn::enum_to_string(info.status));\n                learning_progress_abnormal_detected = true;\n            } else {\n                dinfo(\"%d.%d: ignore abnormal status of %s, perhaps learn not start\",\n                      info.pid.get_app_id(),\n                      info.pid.get_partition_index(),\n                      node.to_string());\n            }\n        } else if (info.status == partition_status::PS_POTENTIAL_SECONDARY) {\n            if (current_learner.ballot > info.ballot ||\n                current_learner.last_committed_decree > info.last_committed_decree ||\n                current_learner.last_prepared_decree > info.last_prepared_decree) {\n\n                // TODO: need to add a perf counter here\n                dwarn(\"%d.%d: learner(%s)'s progress step back, please trace this carefully\",\n                      info.pid.get_app_id(),\n                      info.pid.get_partition_index(),\n                      node.to_string());\n            }\n\n            // NOTICE: the flag may be abormal currently. it's balancer's duty to make use of the\n            // abnormal flag and decide whether to cancel the proposal.\n            // if the balancer try to give the proposal another chance, or another learning round\n            // starts before the balancer notice it, let's just treat it normal again.\n            learning_progress_abnormal_detected = false;\n            current_learner = info;\n        }\n    }\n}\n\nbool proposal_actions::is_abnormal_learning_proposal() const\n{\n    if (empty())\n        return false;\n    if (front()->type != config_type::CT_ADD_SECONDARY &&\n        front()->type != config_type::CT_ADD_SECONDARY_FOR_LB)\n        return false;\n    return learning_progress_abnormal_detected;\n}\n\nvoid proposal_actions::clear()\n{\n    from_balancer = false;\n    acts.clear();\n    reset_tracked_current_learner();\n}\n\nvoid proposal_actions::pop_front()\n{\n    if (!acts.empty()) {\n        acts.erase(acts.begin());\n        reset_tracked_current_learner();\n    }\n}\n\nconst configuration_proposal_action *proposal_actions::front() const\n{\n    if (acts.empty())\n        return nullptr;\n    return &acts.front();\n}\n\nvoid proposal_actions::assign_cure_proposal(const configuration_proposal_action &act)\n{\n    from_balancer = false;\n    acts = {act};\n    reset_tracked_current_learner();\n}\n\nvoid proposal_actions::assign_balancer_proposals(\n    const std::vector<configuration_proposal_action> &cpa_list)\n{\n    from_balancer = true;\n    acts = cpa_list;\n    reset_tracked_current_learner();\n}\n\nbool proposal_actions::empty() const { return acts.empty(); }\n\nint config_context::MAX_REPLICA_COUNT_IN_GRROUP = 4;\nvoid config_context::cancel_sync()\n{\n    if (config_status::pending_remote_sync == stage) {\n        pending_sync_task->cancel(false);\n        pending_sync_task = nullptr;\n        pending_sync_request.reset();\n    }\n    if (msg) {\n        msg->release_ref();\n    }\n    msg = nullptr;\n    stage = config_status::not_pending;\n}\n\nvoid config_context::check_size()\n{\n    // when add learner, it is possible that replica_count > max_replica_count, so we\n    // need to remove things from dropped only when it's not empty.\n    while (replica_count(*config_owner) + dropped.size() >\n               config_owner->max_replica_count + FLAGS_max_reserved_dropped_replicas &&\n           !dropped.empty()) {\n        dropped.erase(dropped.begin());\n        prefered_dropped = (int)dropped.size() - 1;\n    }\n}\n\nstd::vector<dropped_replica>::iterator config_context::find_from_dropped(const rpc_address &node)\n{\n    return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) {\n        return r.node == node;\n    });\n}\n\nstd::vector<dropped_replica>::const_iterator\nconfig_context::find_from_dropped(const rpc_address &node) const\n{\n    return std::find_if(dropped.begin(), dropped.end(), [&node](const dropped_replica &r) {\n        return r.node == node;\n    });\n}\n\nbool config_context::remove_from_dropped(const rpc_address &node)\n{\n    auto iter = find_from_dropped(node);\n    if (iter != dropped.end()) {\n        dropped.erase(iter);\n        prefered_dropped = (int)dropped.size() - 1;\n        return true;\n    }\n    return false;\n}\n\nbool config_context::record_drop_history(const rpc_address &node)\n{\n    auto iter = find_from_dropped(node);\n    if (iter != dropped.end())\n        return false;\n    dropped.emplace_back(\n        dropped_replica{node, dsn_now_ms(), invalid_ballot, invalid_decree, invalid_decree});\n    prefered_dropped = (int)dropped.size() - 1;\n    check_size();\n    return true;\n}\n\nint config_context::collect_drop_replica(const rpc_address &node, const replica_info &info)\n{\n    bool in_dropped = false;\n    auto iter = find_from_dropped(node);\n    uint64_t last_drop_time = dropped_replica::INVALID_TIMESTAMP;\n    if (iter != dropped.end()) {\n        in_dropped = true;\n        last_drop_time = iter->time;\n        dropped.erase(iter);\n        prefered_dropped = (int)dropped.size() - 1;\n    }\n\n    dropped_replica current = {\n        node, last_drop_time, info.ballot, info.last_committed_decree, info.last_prepared_decree};\n    auto cmp = [](const dropped_replica &d1, const dropped_replica &d2) {\n        return dropped_cmp(d1, d2) < 0;\n    };\n    iter = std::lower_bound(dropped.begin(), dropped.end(), current, cmp);\n\n    dropped.emplace(iter, current);\n    prefered_dropped = (int)dropped.size() - 1;\n    check_size();\n\n    iter = find_from_dropped(node);\n    if (iter == dropped.end()) {\n        dassert(!in_dropped,\n                \"adjust position of existing node(%s) failed, this is a bug, partition(%d.%d)\",\n                node.to_string(),\n                config_owner->pid.get_app_id(),\n                config_owner->pid.get_partition_index());\n        return -1;\n    }\n    return in_dropped ? 1 : 0;\n}\n\nbool config_context::check_order()\n{\n    if (dropped.empty())\n        return true;\n    for (unsigned int i = 0; i < dropped.size() - 1; ++i) {\n        if (dropped_cmp(dropped[i], dropped[i + 1]) > 0) {\n            derror(\"check dropped order for gpid(%d.%d) failed, [%s,%llu,%lld,%lld,%lld@%d] vs \"\n                   \"[%s,%llu,%lld,%lld,%lld@%d]\",\n                   config_owner->pid.get_app_id(),\n                   config_owner->pid.get_partition_index(),\n                   dropped[i].node.to_string(),\n                   dropped[i].time,\n                   dropped[i].ballot,\n                   dropped[i].last_committed_decree,\n                   dropped[i].last_prepared_decree,\n                   i,\n                   dropped[i].node.to_string(),\n                   dropped[i].time,\n                   dropped[i].ballot,\n                   dropped[i].last_committed_decree,\n                   dropped[i].last_prepared_decree,\n                   i + 1);\n            return false;\n        }\n    }\n    return true;\n}\n\nstd::vector<serving_replica>::iterator config_context::find_from_serving(const rpc_address &node)\n{\n    return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) {\n        return r.node == node;\n    });\n}\n\nstd::vector<serving_replica>::const_iterator\nconfig_context::find_from_serving(const rpc_address &node) const\n{\n    return std::find_if(serving.begin(), serving.end(), [&node](const serving_replica &r) {\n        return r.node == node;\n    });\n}\n\nbool config_context::remove_from_serving(const rpc_address &node)\n{\n    auto iter = find_from_serving(node);\n    if (iter != serving.end()) {\n        serving.erase(iter);\n        return true;\n    }\n    return false;\n}\n\nvoid config_context::collect_serving_replica(const rpc_address &node, const replica_info &info)\n{\n    auto iter = find_from_serving(node);\n    auto compact_status = info.__isset.manual_compact_status ? info.manual_compact_status\n                                                             : manual_compaction_status::IDLE;\n    if (iter != serving.end()) {\n        iter->disk_tag = info.disk_tag;\n        iter->storage_mb = 0;\n        iter->compact_status = compact_status;\n    } else {\n        serving.emplace_back(serving_replica{node, 0, info.disk_tag, compact_status});\n    }\n}\n\nvoid config_context::adjust_proposal(const rpc_address &node, const replica_info &info)\n{\n    lb_actions.track_current_learner(node, info);\n}\n\nbool config_context::get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const\n{\n    auto iter = find_from_serving(node);\n    if (iter == serving.end()) {\n        return false;\n    }\n    disk_tag = iter->disk_tag;\n    return true;\n}\n\nvoid app_state_helper::on_init_partitions()\n{\n    config_context context;\n    context.stage = config_status::not_pending;\n    context.pending_sync_task = nullptr;\n    context.msg = nullptr;\n\n    context.prefered_dropped = -1;\n    contexts.assign(owner->partition_count, context);\n\n    std::vector<partition_configuration> &partitions = owner->partitions;\n    for (unsigned int i = 0; i != owner->partition_count; ++i) {\n        contexts[i].config_owner = &(partitions[i]);\n    }\n\n    partitions_in_progress.store(owner->partition_count);\n    restore_states.resize(owner->partition_count);\n}\n\nvoid app_state_helper::reset_manual_compact_status()\n{\n    for (auto &cc : contexts) {\n        for (auto &r : cc.serving) {\n            r.compact_status = manual_compaction_status::IDLE;\n        }\n    }\n}\n\nbool app_state_helper::get_manual_compact_progress(/*out*/ int32_t &progress) const\n{\n    int32_t total_replica_count = owner->partition_count * owner->max_replica_count;\n    dassert_f(total_replica_count > 0,\n              \"invalid app metadata, app({}), partition_count({}), max_replica_count({})\",\n              owner->app_name,\n              owner->partition_count,\n              owner->max_replica_count);\n    int32_t finish_count = 0, idle_count = 0;\n    for (const auto &cc : contexts) {\n        for (const auto &r : cc.serving) {\n            if (r.compact_status == manual_compaction_status::IDLE) {\n                idle_count++;\n            } else if (r.compact_status == manual_compaction_status::FINISHED) {\n                finish_count++;\n            }\n        }\n    }\n    // all replicas of all partitions are idle\n    if (idle_count == total_replica_count) {\n        progress = 0;\n        return false;\n    }\n    progress = finish_count * 100 / total_replica_count;\n    return true;\n}\n\napp_state::app_state(const app_info &info) : app_info(info), helpers(new app_state_helper())\n{\n    log_name = info.app_name + \"(\" + boost::lexical_cast<std::string>(info.app_id) + \")\";\n    helpers->owner = this;\n\n    partition_configuration config;\n    config.ballot = 0;\n    config.pid.set_app_id(app_id);\n    config.last_committed_decree = 0;\n    config.last_drops.clear();\n    config.max_replica_count = app_info::max_replica_count;\n    config.primary.set_invalid();\n    config.secondaries.clear();\n    partitions.assign(app_info::partition_count, config);\n    for (int i = 0; i != app_info::partition_count; ++i)\n        partitions[i].pid.set_partition_index(i);\n\n    helpers->on_init_partitions();\n}\n\nstd::shared_ptr<app_state> app_state::create(const app_info &info)\n{\n    return std::make_shared<app_state>(info);\n}\n\nnode_state::node_state()\n    : total_primaries(0), total_partitions(0), is_alive(false), has_collected_replicas(false)\n{\n}\n\nconst partition_set *node_state::get_partitions(int app_id, bool only_primary) const\n{\n    const std::map<int32_t, partition_set> *all_partitions;\n    if (only_primary)\n        all_partitions = &app_primaries;\n    else\n        all_partitions = &app_partitions;\n\n    auto iter = all_partitions->find(app_id);\n    if (iter == all_partitions->end())\n        return nullptr;\n    else\n        return &(iter->second);\n}\n\npartition_set *node_state::get_partitions(app_id id, bool only_primary, bool create_new)\n{\n    std::map<int32_t, partition_set> *all_partitions;\n    if (only_primary)\n        all_partitions = &app_primaries;\n    else\n        all_partitions = &app_partitions;\n\n    if (create_new) {\n        return &((*all_partitions)[id]);\n    } else {\n        auto iter = all_partitions->find(id);\n        if (iter == all_partitions->end())\n            return nullptr;\n        else\n            return &(iter->second);\n    }\n}\n\npartition_set *node_state::partitions(app_id id, bool only_primary)\n{\n    return const_cast<partition_set *>(get_partitions(id, only_primary));\n}\n\nconst partition_set *node_state::partitions(app_id id, bool only_primary) const\n{\n    return get_partitions(id, only_primary);\n}\n\nvoid node_state::put_partition(const gpid &pid, bool is_primary)\n{\n    partition_set *all = get_partitions(pid.get_app_id(), false, true);\n    if ((all->insert(pid)).second)\n        total_partitions++;\n    if (is_primary) {\n        partition_set *pri = get_partitions(pid.get_app_id(), true, true);\n        if ((pri->insert(pid)).second)\n            total_primaries++;\n    }\n}\n\nvoid node_state::remove_partition(const gpid &pid, bool only_primary)\n{\n    partition_set *pri = get_partitions(pid.get_app_id(), true, true);\n    total_primaries -= pri->erase(pid);\n    if (!only_primary) {\n        partition_set *all = get_partitions(pid.get_app_id(), false, true);\n        total_partitions -= all->erase(pid);\n    }\n}\n\nbool node_state::for_each_primary(app_id id, const std::function<bool(const gpid &)> &f) const\n{\n    const partition_set *pri = partitions(id, true);\n    if (pri == nullptr) {\n        return true;\n    }\n    for (const gpid &pid : *pri) {\n        dassert(id == pid.get_app_id(),\n                \"invalid gpid(%d.%d), app_id must be %d\",\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                id);\n        if (!f(pid))\n            return false;\n    }\n    return true;\n}\n\nbool node_state::for_each_partition(app_id id, const std::function<bool(const gpid &)> &f) const\n{\n    const partition_set *par = partitions(id, false);\n    if (par == nullptr) {\n        return true;\n    }\n    for (const gpid &pid : *par) {\n        dassert(id == pid.get_app_id(),\n                \"invalid gpid(%d.%d), app_id must be %d\",\n                pid.get_app_id(),\n                pid.get_partition_index(),\n                id);\n        if (!f(pid))\n            return false;\n    }\n    return true;\n}\n\nbool node_state::for_each_partition(const std::function<bool(const gpid &)> &f) const\n{\n    for (const auto &pair : app_partitions) {\n        const partition_set &ps = pair.second;\n        for (const auto &gpid : ps) {\n            if (!f(gpid))\n                return false;\n        }\n    }\n    return true;\n}\n\nunsigned node_state::primary_count(app_id id) const\n{\n    const partition_set *pri = partitions(id, true);\n    if (pri == nullptr)\n        return 0;\n    return pri->size();\n}\n\nunsigned node_state::partition_count(app_id id) const\n{\n    const partition_set *pri = partitions(id, false);\n    if (pri == nullptr)\n        return 0;\n    return pri->size();\n}\n\npartition_status::type node_state::served_as(const gpid &pid) const\n{\n    const partition_set *ps1 = partitions(pid.get_app_id(), true);\n    if (ps1 != nullptr && ps1->find(pid) != ps1->end())\n        return partition_status::PS_PRIMARY;\n    const partition_set *ps2 = partitions(pid.get_app_id(), false);\n    if (ps2 != nullptr && ps2->find(pid) != ps2->end())\n        return partition_status::PS_SECONDARY;\n    return partition_status::PS_INACTIVE;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_data.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's date structure\n *\n * Revision history:\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#pragma once\n\n#include <memory>\n#include <set>\n#include <deque>\n#include <vector>\n#include <map>\n#include <unordered_map>\n#include <functional>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/block_service.h>\n\n#include \"meta/duplication/duplication_info.h\"\n\nnamespace dsn {\nnamespace replication {\n\nenum class config_status\n{\n    not_pending,\n    pending_proposal, // deprecated since pegasus v1.8 or older version\n    pending_remote_sync,\n    invalid_status\n};\n\nENUM_BEGIN(config_status, config_status::invalid_status)\nENUM_REG(config_status::not_pending)\nENUM_REG(config_status::pending_proposal)\nENUM_REG(config_status::pending_remote_sync)\nENUM_END(config_status)\n\nenum class pc_status\n{\n    healthy,\n    ill,\n    dead,\n    invalid\n};\n\nENUM_BEGIN(pc_status, pc_status::invalid)\nENUM_REG(pc_status::healthy)\nENUM_REG(pc_status::ill)\nENUM_REG(pc_status::dead)\nENUM_END(pc_status)\n\nclass pc_flags\n{\npublic:\n    static const int dropped = 1;\n};\n\nclass proposal_actions\n{\nprivate:\n    bool from_balancer;\n\n    // used for track the learing process and check if abnormal situation happens\n    bool learning_progress_abnormal_detected;\n    replica_info current_learner;\n\n    // NOTICE:\n    // meta servic use configuration_proposal_action::period_ts\n    // to store a expire timestamp, but a rpc_sender use this field\n    // to suggest a ttl period\n    std::vector<configuration_proposal_action> acts;\n\npublic:\n    proposal_actions();\n    void reset_tracked_current_learner();\n    void track_current_learner(const rpc_address &node, const replica_info &info);\n    void clear();\n\n    // return the action in acts & whether the action is from balancer\n    bool is_from_balancer() const { return from_balancer; }\n    bool is_abnormal_learning_proposal() const;\n\n    void pop_front();\n    void assign_cure_proposal(const configuration_proposal_action &act);\n    void assign_balancer_proposals(const std::vector<configuration_proposal_action> &cpa_list);\n\n    const configuration_proposal_action *front() const;\n    bool empty() const;\n};\n\n//\n// structure \"dropped_replica\" represents a replica which was downgraded to inactive.\n// there are 2 sources to get the dropped replica:\n//   1. by record the meta's update-cfg action\n//   2. by collect the inactive replicas reported from the replica servers\n// generally, we give a partitial order for the dropped_replica, in which a higher order\n// roughly means that the replica has MORE data.\n//\n// a load balancer may record a list of dropped_replica to track the drop history and use\n// it to do the cure decision.\n//\n\n// currently dropped_cmp depend on the dropped_replica::INVALID_TIMESTAMP is 0,\n// if you modify the dropped_replica::INVALID_TIMESTAMP, please modify the dropped_cmp accordingly.\nstruct dropped_replica\n{\n    dsn::rpc_address node;\n\n    // if a drop-replica is generated by the update-cfg-req, then we can\n    // record the drop time (milliseconds)\n    uint64_t time;\n    // if a drop-replica is got from the replica server's report, then we can\n    // record (ballot, commit_decree, prepare_decree)\n    //[\n    int64_t ballot;\n    int64_t last_committed_decree;\n    int64_t last_prepared_decree;\n    //]\n    static const uint64_t INVALID_TIMESTAMP = 0;\n};\n\n// the order of dropped_replica\n// ret:\n//   0 => equal\n//   negtive => d1 smaller than d2\n//   positive => d1 larger than d2\ninline int dropped_cmp(const dropped_replica &d1, const dropped_replica &d2)\n{\n    if (d1.time != d2.time) {\n        return (d1.time < d2.time) ? -1 : 1;\n    }\n    if (d1.ballot != d2.ballot) {\n        return d1.ballot < d2.ballot ? -1 : 1;\n    }\n    if (d1.last_committed_decree != d2.last_committed_decree) {\n        return d1.last_committed_decree < d2.last_committed_decree ? -1 : 1;\n    }\n    if (d1.last_prepared_decree != d2.last_prepared_decree) {\n        return d1.last_prepared_decree < d2.last_prepared_decree ? -1 : 1;\n    }\n    return 0;\n}\n\n// Represent a replica that is serving. Info in this structure can only from config-sync of RS.\n// Load balancer may use this to do balance decisions.\nstruct serving_replica\n{\n    dsn::rpc_address node;\n    // TODO: report the storage size of replica\n    int64_t storage_mb;\n    std::string disk_tag;\n    manual_compaction_status::type compact_status;\n};\n\nclass config_context\n{\npublic:\n    partition_configuration *config_owner;\n    config_status stage;\n    // for server state's update config management\n    //[\n    task_ptr pending_sync_task;\n    std::shared_ptr<configuration_update_request> pending_sync_request;\n    dsn::message_ex *msg;\n    //]\n\n    // for load balancer's decision\n    //[\n    proposal_actions lb_actions;\n    std::vector<serving_replica> serving;\n    std::vector<dropped_replica> dropped;\n    // An index value to the vector \"dropped\".\n    // Used in load-balancer's cure to avoid select the same learner as\n    // previous unsuccessful proposal.\n    // Please refer to partition_guardian::on_missing_secondary.\n    //\n    // This should always be less than the dropped.size()\n    //\n    // TODO: a more clear implementation\n    int32_t prefered_dropped;\n    //]\npublic:\n    void check_size();\n    void cancel_sync();\n\n    std::vector<dropped_replica>::iterator find_from_dropped(const dsn::rpc_address &node);\n    std::vector<dropped_replica>::const_iterator find_from_dropped(const rpc_address &node) const;\n\n    // return true if remove ok, false if node doesn't in dropped\n    bool remove_from_dropped(const dsn::rpc_address &node);\n\n    // put recently downgraded node to dropped\n    // return true if put ok, false if the node has been in dropped\n    bool record_drop_history(const dsn::rpc_address &node);\n\n    // Notice: please make sure whether node is actually an inactive or a serving replica\n    // ret:\n    //   1 => node has been in the dropped\n    //   0 => insert the info to the dropped\n    //  -1 => info is too staled to insert\n    int collect_drop_replica(const dsn::rpc_address &node, const replica_info &info);\n\n    // check if dropped vector satisfied the order\n    bool check_order();\n\n    std::vector<serving_replica>::iterator find_from_serving(const dsn::rpc_address &node);\n    std::vector<serving_replica>::const_iterator find_from_serving(const rpc_address &node) const;\n\n    // return true if remove ok, false if node doesn't in serving\n    bool remove_from_serving(const dsn::rpc_address &node);\n\n    void collect_serving_replica(const dsn::rpc_address &node, const replica_info &info);\n\n    void adjust_proposal(const dsn::rpc_address &node, const replica_info &info);\n\n    bool get_disk_tag(const rpc_address &node, /*out*/ std::string &disk_tag) const;\n\npublic:\n    // intialize to 4 statically.\n    // and will be set by load-balancer module\n    static int MAX_REPLICA_COUNT_IN_GRROUP;\n};\n\nstruct partition_configuration_stateless\n{\n    partition_configuration &config;\n    partition_configuration_stateless(partition_configuration &pc) : config(pc) {}\n    std::vector<dsn::rpc_address> &workers() { return config.last_drops; }\n    std::vector<dsn::rpc_address> &hosts() { return config.secondaries; }\n    bool is_host(const rpc_address &node) const\n    {\n        return std::find(config.secondaries.begin(), config.secondaries.end(), node) !=\n               config.secondaries.end();\n    }\n    bool is_worker(const rpc_address &node) const\n    {\n        return std::find(config.last_drops.begin(), config.last_drops.end(), node) !=\n               config.last_drops.end();\n    }\n    bool is_member(const rpc_address &node) const { return is_host(node) || is_worker(node); }\n};\n\nstruct restore_state\n{\n    // restore_status:\n    //      ERR_OK: restore haven't encounter some error\n    //      ERR_CORRUPTION : data on backup media is damaged and we can not skip the damage data,\n    //                       so should restore rollback\n    //      ERR_IGNORE_DAMAGED_DATA : data on backup media is damaged but we can skip the damage\n    //                                data, so skip the damaged partition\n    dsn::error_code restore_status;\n    int32_t progress;\n    std::string reason;\n    restore_state() : restore_status(dsn::ERR_OK), progress(0), reason() {}\n};\n\n// app partition_split states\n// when starting partition split, `splitting_count` will be equal to old_partition_count,\n// <parent_partition_index, SPLITTING> will be inserted into `status`.\n// if partition[0] finish split, `splitting_count` will decrease and <0, SPLITTING> will be removed\n// in `status`.\nstruct split_state\n{\n    int32_t splitting_count;\n    // partition_index -> split_status\n    std::map<int32_t, split_status::type> status;\n    split_state() : splitting_count(0) {}\n};\n\nclass app_state;\nclass app_state_helper\n{\npublic:\n    app_state *owner;\n    std::atomic_int partitions_in_progress;\n    std::vector<config_context> contexts;\n    dsn::message_ex *pending_response;\n    std::vector<restore_state> restore_states;\n    split_state split_states;\n\npublic:\n    app_state_helper() : owner(nullptr), partitions_in_progress(0)\n    {\n        contexts.clear();\n        pending_response = nullptr;\n    }\n    void on_init_partitions();\n    void clear_proposals()\n    {\n        for (config_context &cc : contexts) {\n            cc.lb_actions.clear();\n        }\n    }\n\n    void reset_manual_compact_status();\n    // get replica group manual compact progress\n    // return false if partition is not executing manual compaction\n    bool get_manual_compact_progress(/*out*/ int32_t &progress) const;\n};\n\n/*\n * NOTICE: several keys in envs are reserved for recover from cold_backup:\n * envs[\"block_service_provider\"] = <block_service_provider>\n * envs[\"cluster_name\"] = <cluster_name>\n * envs[\"policy_name\"] = <policy_name>\n * envs[\"app_name\"] = <app_name>\n * envs[\"app_id\"] = <app_id>\n * envs[\"backup_id\"] = <backup_id>\n * envs[\"skip_bad_partition\"] = <\"true\" or \"false\">\n *\n * after a newly assigned primary get these envs from app_info, it will try to\n * init a replica with data stored on the block_device\n */\nclass app_state : public app_info\n{\nprotected:\n    std::string log_name;\n\npublic:\n    app_state(const app_info &info);\n\npublic:\n    const char *get_logname() const { return log_name.c_str(); }\n    std::shared_ptr<app_state_helper> helpers;\n    std::vector<partition_configuration> partitions;\n    std::map<dupid_t, duplication_info_s_ptr> duplications;\n\n    static std::shared_ptr<app_state> create(const app_info &info);\n    dsn::blob to_json(app_status::type temp_status)\n    {\n        app_info another = *this;\n        another.status = temp_status;\n        // persistent envs to zookeeper\n        dsn::blob result = dsn::json::json_forwarder<app_info>::encode(another);\n        return result;\n    }\n    bool splitting() const { return helpers->split_states.splitting_count > 0; }\n};\n\ntypedef std::set<dsn::gpid> partition_set;\ntypedef std::map<app_id, std::shared_ptr<app_state>> app_mapper;\n\nclass node_state : public extensible_object<node_state, 4>\n{\nprivate:\n    // partitions\n    std::map<int32_t, partition_set> app_primaries;\n    std::map<int32_t, partition_set> app_partitions;\n    unsigned total_primaries;\n    unsigned total_partitions;\n\n    // status\n    bool is_alive;\n    bool has_collected_replicas;\n    dsn::rpc_address address;\n\n    const partition_set *get_partitions(app_id id, bool only_primary) const;\n    partition_set *get_partitions(app_id id, bool only_primary, bool create_new);\n\npublic:\n    node_state();\n    const partition_set *partitions(app_id id, bool only_primary) const;\n    partition_set *partitions(app_id id, bool only_primary);\n\n    unsigned primary_count(app_id id) const;\n    unsigned secondary_count(app_id id) const { return partition_count(id) - primary_count(id); }\n    unsigned partition_count(app_id id) const;\n\n    unsigned primary_count() const { return total_primaries; }\n    unsigned secondary_count() const { return total_partitions - total_primaries; }\n    unsigned partition_count() const { return total_partitions; }\n\n    partition_status::type served_as(const gpid &pid) const;\n\n    bool alive() const { return is_alive; }\n    void set_alive(bool alive) { is_alive = alive; }\n    bool has_collected() { return has_collected_replicas; }\n    void set_replicas_collect_flag(bool has_collected) { has_collected_replicas = has_collected; }\n    dsn::rpc_address addr() const { return address; }\n    void set_addr(const dsn::rpc_address &addr) { address = addr; }\n\n    void put_partition(const dsn::gpid &pid, bool is_primary);\n    void remove_partition(const dsn::gpid &pid, bool only_primary);\n\n    bool for_each_partition(const std::function<bool(const dsn::gpid &pid)> &f) const;\n    bool for_each_partition(app_id id, const std::function<bool(const dsn::gpid &)> &f) const;\n    bool for_each_primary(app_id id, const std::function<bool(const dsn::gpid &pid)> &f) const;\n};\n\ntypedef std::unordered_map<rpc_address, node_state> node_mapper;\ntypedef std::map<dsn::gpid, std::shared_ptr<configuration_balancer_request>> migration_list;\n\nstruct meta_view\n{\n    app_mapper *apps;\n    node_mapper *nodes;\n};\n\ninline node_state *get_node_state(node_mapper &nodes, rpc_address addr, bool create_new)\n{\n    node_state *ns;\n    if (nodes.find(addr) == nodes.end()) {\n        if (!create_new)\n            return nullptr;\n        ns = &nodes[addr];\n        ns->set_addr(addr);\n    }\n    ns = &nodes[addr];\n    return ns;\n}\n\ninline bool is_node_alive(const node_mapper &nodes, rpc_address addr)\n{\n    auto iter = nodes.find(addr);\n    if (iter == nodes.end())\n        return false;\n    return iter->second.alive();\n}\n\ninline const partition_configuration *get_config(const app_mapper &apps, const dsn::gpid &gpid)\n{\n    auto iter = apps.find(gpid.get_app_id());\n    if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED)\n        return nullptr;\n    return &(iter->second->partitions[gpid.get_partition_index()]);\n}\n\ninline partition_configuration *get_config(app_mapper &apps, const dsn::gpid &gpid)\n{\n    auto iter = apps.find(gpid.get_app_id());\n    if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED)\n        return nullptr;\n    return &(iter->second->partitions[gpid.get_partition_index()]);\n}\n\ninline const config_context *get_config_context(const app_mapper &apps, const dsn::gpid &gpid)\n{\n    auto iter = apps.find(gpid.get_app_id());\n    if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED)\n        return nullptr;\n    return &(iter->second->helpers->contexts[gpid.get_partition_index()]);\n}\n\ninline config_context *get_config_context(app_mapper &apps, const dsn::gpid &gpid)\n{\n    auto iter = apps.find(gpid.get_app_id());\n    if (iter == apps.end() || iter->second->status == app_status::AS_DROPPED)\n        return nullptr;\n    return &(iter->second->helpers->contexts[gpid.get_partition_index()]);\n}\n\ninline int replica_count(const partition_configuration &pc)\n{\n    int ans = (pc.primary.is_invalid()) ? 0 : 1;\n    return ans + pc.secondaries.size();\n}\n\nenum health_status\n{\n    HS_DEAD = 0,     // (primary = 0 && secondary = 0)\n    HS_UNREADABLE,   // (primary = 0 && secondary > 0)\n    HS_UNWRITABLE,   // (primary = 1 && primary + secondary < mutation_2pc_min_replica_count)\n    HS_WRITABLE_ILL, // (primary = 1 && primary + secondary >= mutation_2pc_min_replica_count\n                     //              && primary + secondary < max_replica_count)\n    HS_HEALTHY,      // (primary = 1 && primary + secondary >= max_replica_count)\n    HS_MAX_VALUE\n};\n\ninline health_status partition_health_status(const partition_configuration &pc,\n                                             int mutation_2pc_min_replica_count)\n{\n    if (pc.primary.is_invalid()) {\n        if (pc.secondaries.empty())\n            return HS_DEAD;\n        else\n            return HS_UNREADABLE;\n    } else { // !pc.primary.is_invalid()\n        int n = pc.secondaries.size() + 1;\n        if (n < mutation_2pc_min_replica_count)\n            return HS_UNWRITABLE;\n        else if (n < pc.max_replica_count)\n            return HS_WRITABLE_ILL;\n        else\n            return HS_HEALTHY;\n    }\n}\n\ninline void\nfor_each_available_app(const app_mapper &apps,\n                       const std::function<bool(const std::shared_ptr<app_state> &)> &action)\n{\n    for (const auto &p : apps) {\n        if (p.second->status == app_status::AS_AVAILABLE) {\n            if (!action(p.second))\n                break;\n        }\n    }\n}\n\ninline int count_partitions(const app_mapper &apps)\n{\n    int result = 0;\n    for (auto iter : apps)\n        if (iter.second->status == app_status::AS_AVAILABLE)\n            result += iter.second->partition_count;\n    return result;\n}\n\nvoid when_update_replicas(config_type::type t, const std::function<void(bool)> &func);\nvoid maintain_drops(/*inout*/ std::vector<dsn::rpc_address> &drops,\n                    const dsn::rpc_address &node,\n                    config_type::type t);\n\n// Try to construct a replica-group by current replica-infos of a gpid\n// ret:\n//   if construct the replica successfully, return true.\n//   Notice: as long as we can construct something from current infos, we treat it as a\n//   success\nbool construct_replica(meta_view view, const gpid &pid, int max_replica_count);\n\n// When replica infos are collected from replica servers, meta-server\n// will use this to check if a replica on a server is useful\n// params:\n//   node: the owner of the replica info\n//   info: the replica info on node\n// ret:\n//   return true if the replica is accepted as an useful replica. Or-else false.\n//   WARNING: if false is returned, the replica on node may be garbage-collected\nbool collect_replica(meta_view view, const rpc_address &node, const replica_info &info);\n\ninline bool has_seconds_expired(uint64_t second_ts) { return second_ts * 1000 < dsn_now_ms(); }\n\ninline bool has_milliseconds_expired(uint64_t milliseconds_ts)\n{\n    return milliseconds_ts < dsn_now_ms();\n}\n} // namespace replication\n} // namespace dsn\n\nnamespace dsn {\nnamespace json {\n\ninline void json_encode(dsn::json::JsonWriter &out, const replication::app_state &state)\n{\n    json_forwarder<dsn::app_info>::encode(out, (const dsn::app_info &)state);\n}\n\ninline bool json_decode(const dsn::json::JsonObject &in, replication::app_state &state)\n{\n    return json_forwarder<dsn::app_info>::decode(in, (dsn::app_info &)state);\n}\n} // namespace json\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_http_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <string>\n\n#include <dsn/c/api_layer1.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/output_utils.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"meta_http_service.h\"\n#include \"meta_server_failure_detector.h\"\n#include \"server_load_balancer.h\"\n#include \"server_state.h\"\n#include \"meta/duplication/meta_duplication_service.h\"\n#include \"meta/meta_bulk_load_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\nstruct list_nodes_helper\n{\n    std::string node_address;\n    std::string node_status;\n    int primary_count;\n    int secondary_count;\n    list_nodes_helper(const std::string &a, const std::string &s)\n        : node_address(a), node_status(s), primary_count(0), secondary_count(0)\n    {\n    }\n};\n\nvoid meta_http_service::get_app_handler(const http_request &req, http_response &resp)\n{\n    std::string app_name;\n    bool detailed = false;\n    for (const auto &p : req.query_args) {\n        if (p.first == \"name\") {\n            app_name = p.second;\n        } else if (p.first == \"detail\") {\n            detailed = true;\n        } else {\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n    if (!redirect_if_not_primary(req, resp))\n        return;\n\n    configuration_query_by_index_request request;\n    configuration_query_by_index_response response;\n\n    request.app_name = app_name;\n    _service->_state->query_configuration_by_index(request, response);\n    if (response.err == ERR_OBJECT_NOT_FOUND) {\n        resp.status_code = http_status_code::not_found;\n        resp.body = fmt::format(\"table not found: \\\"{}\\\"\", app_name);\n        return;\n    }\n    if (response.err != dsn::ERR_OK) {\n        resp.body = response.err.to_string();\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n\n    // output as json format\n    dsn::utils::multi_table_printer mtp;\n    std::ostringstream out;\n    dsn::utils::table_printer tp_general(\"general\");\n    tp_general.add_row_name_and_data(\"app_name\", app_name);\n    tp_general.add_row_name_and_data(\"app_id\", response.app_id);\n    tp_general.add_row_name_and_data(\"partition_count\", response.partition_count);\n    if (!response.partitions.empty()) {\n        tp_general.add_row_name_and_data(\"max_replica_count\",\n                                         response.partitions[0].max_replica_count);\n    } else {\n        tp_general.add_row_name_and_data(\"max_replica_count\", 0);\n    }\n    mtp.add(std::move(tp_general));\n\n    if (detailed) {\n        dsn::utils::table_printer tp_details(\"replicas\");\n        tp_details.add_title(\"pidx\");\n        tp_details.add_column(\"ballot\");\n        tp_details.add_column(\"replica_count\");\n        tp_details.add_column(\"primary\");\n        tp_details.add_column(\"secondaries\");\n        std::map<rpc_address, std::pair<int, int>> node_stat;\n\n        int total_prim_count = 0;\n        int total_sec_count = 0;\n        int fully_healthy = 0;\n        int write_unhealthy = 0;\n        int read_unhealthy = 0;\n        for (const auto &p : response.partitions) {\n            int replica_count = 0;\n            if (!p.primary.is_invalid()) {\n                replica_count++;\n                node_stat[p.primary].first++;\n                total_prim_count++;\n            }\n            replica_count += p.secondaries.size();\n            total_sec_count += p.secondaries.size();\n            if (!p.primary.is_invalid()) {\n                if (replica_count >= p.max_replica_count)\n                    fully_healthy++;\n                else if (replica_count < 2)\n                    write_unhealthy++;\n            } else {\n                write_unhealthy++;\n                read_unhealthy++;\n            }\n            tp_details.add_row(p.pid.get_partition_index());\n            tp_details.append_data(p.ballot);\n            std::stringstream oss;\n            oss << replica_count << \"/\" << p.max_replica_count;\n            tp_details.append_data(oss.str());\n            tp_details.append_data((p.primary.is_invalid() ? \"-\" : p.primary.to_std_string()));\n            oss.str(\"\");\n            oss << \"[\";\n            for (int j = 0; j < p.secondaries.size(); j++) {\n                if (j != 0)\n                    oss << \",\";\n                oss << p.secondaries[j].to_std_string();\n                node_stat[p.secondaries[j]].second++;\n            }\n            oss << \"]\";\n            tp_details.append_data(oss.str());\n        }\n        mtp.add(std::move(tp_details));\n\n        // 'node' section.\n        dsn::utils::table_printer tp_nodes(\"nodes\");\n        tp_nodes.add_title(\"node\");\n        tp_nodes.add_column(\"primary\");\n        tp_nodes.add_column(\"secondary\");\n        tp_nodes.add_column(\"total\");\n        for (auto &kv : node_stat) {\n            tp_nodes.add_row(kv.first.to_std_string());\n            tp_nodes.append_data(kv.second.first);\n            tp_nodes.append_data(kv.second.second);\n            tp_nodes.append_data(kv.second.first + kv.second.second);\n        }\n        tp_nodes.add_row(\"total\");\n        tp_nodes.append_data(total_prim_count);\n        tp_nodes.append_data(total_sec_count);\n        tp_nodes.append_data(total_prim_count + total_sec_count);\n        mtp.add(std::move(tp_nodes));\n\n        // healthy partition count section.\n        dsn::utils::table_printer tp_hpc(\"healthy\");\n        tp_hpc.add_row_name_and_data(\"fully_healthy_partition_count\", fully_healthy);\n        tp_hpc.add_row_name_and_data(\"unhealthy_partition_count\",\n                                     response.partition_count - fully_healthy);\n        tp_hpc.add_row_name_and_data(\"write_unhealthy_partition_count\", write_unhealthy);\n        tp_hpc.add_row_name_and_data(\"read_unhealthy_partition_count\", read_unhealthy);\n        mtp.add(std::move(tp_hpc));\n    }\n\n    mtp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::list_app_handler(const http_request &req, http_response &resp)\n{\n    bool detailed = false;\n    for (const auto &p : req.query_args) {\n        if (p.first == \"detail\") {\n            detailed = true;\n        } else {\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n    if (!redirect_if_not_primary(req, resp))\n        return;\n    configuration_list_apps_response response;\n    configuration_list_apps_request request;\n    request.status = dsn::app_status::AS_INVALID;\n\n    _service->_state->list_apps(request, response);\n\n    if (response.err != dsn::ERR_OK) {\n        resp.body = response.err.to_string();\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n    std::vector<::dsn::app_info> &apps = response.infos;\n\n    // output as json format\n    std::ostringstream out;\n    dsn::utils::multi_table_printer mtp;\n    int available_app_count = 0;\n    dsn::utils::table_printer tp_general(\"general_info\");\n    tp_general.add_title(\"app_id\");\n    tp_general.add_column(\"status\");\n    tp_general.add_column(\"app_name\");\n    tp_general.add_column(\"app_type\");\n    tp_general.add_column(\"partition_count\");\n    tp_general.add_column(\"replica_count\");\n    tp_general.add_column(\"is_stateful\");\n    tp_general.add_column(\"create_time\");\n    tp_general.add_column(\"drop_time\");\n    tp_general.add_column(\"drop_expire\");\n    tp_general.add_column(\"envs_count\");\n    for (const auto &app : apps) {\n        if (app.status != dsn::app_status::AS_AVAILABLE) {\n            continue;\n        }\n        std::string status_str = enum_to_string(app.status);\n        status_str = status_str.substr(status_str.find(\"AS_\") + 3);\n        std::string create_time = \"-\";\n        if (app.create_second > 0) {\n            char buf[24];\n            dsn::utils::time_ms_to_string((uint64_t)app.create_second * 1000, buf);\n            create_time = buf;\n        }\n        std::string drop_time = \"-\";\n        std::string drop_expire_time = \"-\";\n        if (app.status == app_status::AS_AVAILABLE) {\n            available_app_count++;\n        } else if (app.status == app_status::AS_DROPPED && app.expire_second > 0) {\n            if (app.drop_second > 0) {\n                char buf[24];\n                dsn::utils::time_ms_to_string((uint64_t)app.drop_second * 1000, buf);\n                drop_time = buf;\n            }\n            if (app.expire_second > 0) {\n                char buf[24];\n                dsn::utils::time_ms_to_string((uint64_t)app.expire_second * 1000, buf);\n                drop_expire_time = buf;\n            }\n        }\n\n        tp_general.add_row(app.app_id);\n        tp_general.append_data(status_str);\n        tp_general.append_data(app.app_name);\n        tp_general.append_data(app.app_type);\n        tp_general.append_data(app.partition_count);\n        tp_general.append_data(app.max_replica_count);\n        tp_general.append_data(app.is_stateful);\n        tp_general.append_data(create_time);\n        tp_general.append_data(drop_time);\n        tp_general.append_data(drop_expire_time);\n        tp_general.append_data(app.envs.size());\n    }\n    mtp.add(std::move(tp_general));\n\n    int total_fully_healthy_app_count = 0;\n    int total_unhealthy_app_count = 0;\n    int total_write_unhealthy_app_count = 0;\n    int total_read_unhealthy_app_count = 0;\n    if (detailed && available_app_count > 0) {\n        dsn::utils::table_printer tp_health(\"healthy_info\");\n        tp_health.add_title(\"app_id\");\n        tp_health.add_column(\"app_name\");\n        tp_health.add_column(\"partition_count\");\n        tp_health.add_column(\"fully_healthy\");\n        tp_health.add_column(\"unhealthy\");\n        tp_health.add_column(\"write_unhealthy\");\n        tp_health.add_column(\"read_unhealthy\");\n        for (auto &info : apps) {\n            if (info.status != app_status::AS_AVAILABLE) {\n                continue;\n            }\n            configuration_query_by_index_request request;\n            configuration_query_by_index_response response;\n            request.app_name = info.app_name;\n            _service->_state->query_configuration_by_index(request, response);\n            dassert(info.app_id == response.app_id,\n                    \"invalid app_id, %d VS %d\",\n                    info.app_id,\n                    response.app_id);\n            dassert(info.partition_count == response.partition_count,\n                    \"invalid partition_count, %d VS %d\",\n                    info.partition_count,\n                    response.partition_count);\n            int fully_healthy = 0;\n            int write_unhealthy = 0;\n            int read_unhealthy = 0;\n            for (int i = 0; i < response.partitions.size(); i++) {\n                const dsn::partition_configuration &p = response.partitions[i];\n                int replica_count = 0;\n                if (!p.primary.is_invalid()) {\n                    replica_count++;\n                }\n                replica_count += p.secondaries.size();\n                if (!p.primary.is_invalid()) {\n                    if (replica_count >= p.max_replica_count)\n                        fully_healthy++;\n                    else if (replica_count < 2)\n                        write_unhealthy++;\n                } else {\n                    write_unhealthy++;\n                    read_unhealthy++;\n                }\n            }\n            tp_health.add_row(info.app_id);\n            tp_health.append_data(info.app_name);\n            tp_health.append_data(info.partition_count);\n            tp_health.append_data(fully_healthy);\n            tp_health.append_data(info.partition_count - fully_healthy);\n            tp_health.append_data(write_unhealthy);\n            tp_health.append_data(read_unhealthy);\n\n            if (fully_healthy == info.partition_count)\n                total_fully_healthy_app_count++;\n            else\n                total_unhealthy_app_count++;\n            if (write_unhealthy > 0)\n                total_write_unhealthy_app_count++;\n            if (read_unhealthy > 0)\n                total_read_unhealthy_app_count++;\n        }\n        mtp.add(std::move(tp_health));\n    }\n\n    dsn::utils::table_printer tp_count(\"summary\");\n    tp_count.add_row_name_and_data(\"total_app_count\", available_app_count);\n    if (detailed && available_app_count > 0) {\n        tp_count.add_row_name_and_data(\"fully_healthy_app_count\", total_fully_healthy_app_count);\n        tp_count.add_row_name_and_data(\"unhealthy_app_count\", total_unhealthy_app_count);\n        tp_count.add_row_name_and_data(\"write_unhealthy_app_count\",\n                                       total_write_unhealthy_app_count);\n        tp_count.add_row_name_and_data(\"read_unhealthy_app_count\", total_read_unhealthy_app_count);\n    }\n    mtp.add(std::move(tp_count));\n\n    mtp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::list_node_handler(const http_request &req, http_response &resp)\n{\n    bool detailed = false;\n    for (const auto &p : req.query_args) {\n        if (p.first == \"detail\") {\n            detailed = true;\n        } else {\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n    if (!redirect_if_not_primary(req, resp))\n        return;\n\n    std::map<dsn::rpc_address, list_nodes_helper> tmp_map;\n    for (const auto &node : _service->_alive_set) {\n        tmp_map.emplace(node, list_nodes_helper(node.to_std_string(), \"ALIVE\"));\n    }\n    for (const auto &node : _service->_dead_set) {\n        tmp_map.emplace(node, list_nodes_helper(node.to_std_string(), \"UNALIVE\"));\n    }\n    int alive_node_count = (_service->_alive_set).size();\n    int unalive_node_count = (_service->_dead_set).size();\n\n    if (detailed) {\n        configuration_list_apps_response response;\n        configuration_list_apps_request request;\n        request.status = dsn::app_status::AS_AVAILABLE;\n        _service->_state->list_apps(request, response);\n        for (const auto &app : response.infos) {\n            configuration_query_by_index_request request_app;\n            configuration_query_by_index_response response_app;\n            request_app.app_name = app.app_name;\n            _service->_state->query_configuration_by_index(request_app, response_app);\n            dassert(app.app_id == response_app.app_id,\n                    \"invalid app_id, %d VS %d\",\n                    app.app_id,\n                    response_app.app_id);\n            dassert(app.partition_count == response_app.partition_count,\n                    \"invalid partition_count, %d VS %d\",\n                    app.partition_count,\n                    response_app.partition_count);\n\n            for (int i = 0; i < response_app.partitions.size(); i++) {\n                const dsn::partition_configuration &p = response_app.partitions[i];\n                if (!p.primary.is_invalid()) {\n                    auto find = tmp_map.find(p.primary);\n                    if (find != tmp_map.end()) {\n                        find->second.primary_count++;\n                    }\n                }\n                for (int j = 0; j < p.secondaries.size(); j++) {\n                    auto find = tmp_map.find(p.secondaries[j]);\n                    if (find != tmp_map.end()) {\n                        find->second.secondary_count++;\n                    }\n                }\n            }\n        }\n    }\n\n    // output as json format\n    std::ostringstream out;\n    dsn::utils::multi_table_printer mtp;\n    dsn::utils::table_printer tp_details(\"details\");\n    tp_details.add_title(\"address\");\n    tp_details.add_column(\"status\");\n    if (detailed) {\n        tp_details.add_column(\"replica_count\");\n        tp_details.add_column(\"primary_count\");\n        tp_details.add_column(\"secondary_count\");\n    }\n    for (const auto &kv : tmp_map) {\n        tp_details.add_row(kv.second.node_address);\n        tp_details.append_data(kv.second.node_status);\n        if (detailed) {\n            tp_details.append_data(kv.second.primary_count + kv.second.secondary_count);\n            tp_details.append_data(kv.second.primary_count);\n            tp_details.append_data(kv.second.secondary_count);\n        }\n    }\n    mtp.add(std::move(tp_details));\n\n    dsn::utils::table_printer tp_count(\"summary\");\n    tp_count.add_row_name_and_data(\"total_node_count\", alive_node_count + unalive_node_count);\n    tp_count.add_row_name_and_data(\"alive_node_count\", alive_node_count);\n    tp_count.add_row_name_and_data(\"unalive_node_count\", unalive_node_count);\n    mtp.add(std::move(tp_count));\n    mtp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::get_cluster_info_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp))\n        return;\n\n    dsn::utils::table_printer tp;\n    std::ostringstream out;\n    std::string meta_servers_str;\n    int ms_size = _service->_opts.meta_servers.size();\n    for (int i = 0; i < ms_size; i++) {\n        meta_servers_str += _service->_opts.meta_servers[i].to_std_string();\n        if (i != ms_size - 1) {\n            meta_servers_str += \",\";\n        }\n    }\n    tp.add_row_name_and_data(\"meta_servers\", meta_servers_str);\n    tp.add_row_name_and_data(\"primary_meta_server\", dsn_primary_address().to_std_string());\n    std::string zk_hosts =\n        dsn_config_get_value_string(\"zookeeper\", \"hosts_list\", \"\", \"zookeeper_hosts\");\n    zk_hosts.erase(std::remove_if(zk_hosts.begin(), zk_hosts.end(), ::isspace), zk_hosts.end());\n    tp.add_row_name_and_data(\"zookeeper_hosts\", zk_hosts);\n    tp.add_row_name_and_data(\"zookeeper_root\", _service->_cluster_root);\n    tp.add_row_name_and_data(\n        \"meta_function_level\",\n        _meta_function_level_VALUES_TO_NAMES.find(_service->get_function_level())->second + 3);\n    std::vector<std::string> balance_operation_type;\n    balance_operation_type.emplace_back(\"detail\");\n    tp.add_row_name_and_data(\n        \"balance_operation_count\",\n        _service->_balancer->get_balance_operation_count(balance_operation_type));\n    double primary_stddev, total_stddev;\n    _service->_state->get_cluster_balance_score(primary_stddev, total_stddev);\n    tp.add_row_name_and_data(\"primary_replica_count_stddev\", primary_stddev);\n    tp.add_row_name_and_data(\"total_replica_count_stddev\", total_stddev);\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::get_app_envs_handler(const http_request &req, http_response &resp)\n{\n    // only primary process the request\n    if (!redirect_if_not_primary(req, resp))\n        return;\n\n    std::string app_name;\n    for (const auto &p : req.query_args) {\n        if (\"name\" == p.first) {\n            app_name = p.second;\n            break;\n        }\n    }\n    if (app_name.empty()) {\n        resp.status_code = http_status_code::bad_request;\n        resp.body = \"app name shouldn't be empty\";\n        return;\n    }\n\n    // get all of the apps\n    configuration_list_apps_response response;\n    configuration_list_apps_request request;\n    request.status = dsn::app_status::AS_AVAILABLE;\n    _service->_state->list_apps(request, response);\n    if (response.err != dsn::ERR_OK) {\n        resp.body = response.err.to_string();\n        resp.status_code = http_status_code::internal_server_error;\n        return;\n    }\n\n    // using app envs to generate a table_printer\n    dsn::utils::table_printer tp;\n    for (auto &app : response.infos) {\n        if (app.app_name == app_name) {\n            for (auto env : app.envs) {\n                tp.add_row_name_and_data(env.first, env.second);\n            }\n            break;\n        }\n    }\n\n    // output as json format\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nstd::string set_to_string(const std::set<int32_t> &s)\n{\n    std::stringstream out;\n    rapidjson::OStreamWrapper wrapper(out);\n    dsn::json::JsonWriter writer(wrapper);\n    dsn::json::json_encode(writer, s);\n    return out.str();\n}\n\nvoid meta_http_service::query_backup_policy_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp))\n        return;\n\n    if (_service->_backup_handler == nullptr) {\n        resp.body = \"cold_backup_disabled\";\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n    auto request = dsn::make_unique<configuration_query_backup_policy_request>();\n    std::vector<std::string> policy_names;\n    for (const auto &p : req.query_args) {\n        if (p.first == \"name\") {\n            policy_names.push_back(p.second);\n        } else {\n            resp.body = \"Invalid parameter\";\n            resp.status_code = http_status_code::bad_request;\n            return;\n        }\n    }\n    request->policy_names = std::move(policy_names);\n    query_backup_policy_rpc http_to_rpc(std::move(request), LPC_DEFAULT_CALLBACK);\n    _service->_backup_handler->query_backup_policy(http_to_rpc);\n    auto rpc_return = http_to_rpc.response();\n\n    dsn::utils::table_printer tp_query_backup_policy;\n    tp_query_backup_policy.add_title(\"name\");\n    tp_query_backup_policy.add_column(\"backup_provider_type\");\n    tp_query_backup_policy.add_column(\"backup_interval\");\n    tp_query_backup_policy.add_column(\"app_ids\");\n    tp_query_backup_policy.add_column(\"start_time\");\n    tp_query_backup_policy.add_column(\"status\");\n    tp_query_backup_policy.add_column(\"backup_history_count\");\n    for (const auto &cur_policy : rpc_return.policys) {\n        tp_query_backup_policy.add_row(cur_policy.policy_name);\n        tp_query_backup_policy.append_data(cur_policy.backup_provider_type);\n        tp_query_backup_policy.append_data(cur_policy.backup_interval_seconds);\n        tp_query_backup_policy.append_data(set_to_string(cur_policy.app_ids));\n        tp_query_backup_policy.append_data(cur_policy.start_time);\n        tp_query_backup_policy.append_data(cur_policy.is_disable ? \"disabled\" : \"enabled\");\n        tp_query_backup_policy.append_data(cur_policy.backup_history_count_to_keep);\n    }\n    std::ostringstream out;\n    tp_query_backup_policy.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::query_duplication_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp)) {\n        return;\n    }\n    if (_service->_dup_svc == nullptr) {\n        resp.body = \"duplication is not enabled [duplication_enabled=false]\";\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n    duplication_query_request rpc_req;\n    auto it = req.query_args.find(\"name\");\n    if (it == req.query_args.end()) {\n        resp.body = \"name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    rpc_req.app_name = it->second;\n    duplication_query_response rpc_resp;\n    _service->_dup_svc->query_duplication_info(rpc_req, rpc_resp);\n    if (rpc_resp.err != ERR_OK) {\n        resp.body = rpc_resp.err.to_string();\n        if (rpc_resp.err == ERR_APP_NOT_EXIST) {\n            resp.status_code = http_status_code::not_found;\n        } else {\n            resp.status_code = http_status_code::internal_server_error;\n        }\n        return;\n    }\n    resp.status_code = http_status_code::ok;\n    resp.body = duplication_query_response_to_string(rpc_resp);\n}\n\nvoid meta_http_service::start_bulk_load_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp)) {\n        return;\n    }\n\n    if (_service->_bulk_load_svc == nullptr) {\n        resp.body = \"bulk load is not enabled\";\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n\n    start_bulk_load_request request;\n    bool ret = json::json_forwarder<start_bulk_load_request>::decode(req.body, request);\n    if (!ret) {\n        resp.body = \"invalid request structure\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (request.app_name.empty()) {\n        resp.body = \"app_name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (request.cluster_name.empty()) {\n        resp.body = \"cluster_name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (request.file_provider_type.empty()) {\n        resp.body = \"file_provider_type should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (request.remote_root_path.empty()) {\n        resp.body = \"remote_root_path should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    auto rpc_req = dsn::make_unique<start_bulk_load_request>(request);\n    start_bulk_load_rpc rpc(std::move(rpc_req), LPC_META_CALLBACK);\n    _service->_bulk_load_svc->on_start_bulk_load(rpc);\n\n    auto rpc_resp = rpc.response();\n    // output as json format\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"error\", rpc_resp.err.to_string());\n    tp.add_row_name_and_data(\"hint_msg\", rpc_resp.hint_msg);\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::query_bulk_load_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp)) {\n        return;\n    }\n\n    if (_service->_bulk_load_svc == nullptr) {\n        resp.body = \"bulk load is not enabled\";\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n\n    auto it = req.query_args.find(\"name\");\n    if (it == req.query_args.end()) {\n        resp.body = \"name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    auto rpc_req = dsn::make_unique<query_bulk_load_request>();\n    rpc_req->app_name = it->second;\n    query_bulk_load_rpc rpc(std::move(rpc_req), LPC_META_CALLBACK);\n    _service->_bulk_load_svc->on_query_bulk_load_status(rpc);\n    auto rpc_resp = rpc.response();\n    // output as json format\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"error\", rpc_resp.err.to_string());\n    tp.add_row_name_and_data(\"app_status\", dsn::enum_to_string(rpc_resp.app_status));\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\nvoid meta_http_service::start_compaction_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp)) {\n        return;\n    }\n\n    // validate parameters\n    manual_compaction_info info;\n    bool ret = json::json_forwarder<manual_compaction_info>::decode(req.body, info);\n\n    if (!ret) {\n        resp.body = \"invalid request structure\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.app_name.empty()) {\n        resp.body = \"app_name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.type.empty() || (info.type != \"once\" && info.type != \"periodic\")) {\n        resp.body = \"type should ony be 'once' or 'periodic'\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.target_level < -1) {\n        resp.body = \"target_level should be >= -1\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.bottommost_level_compaction.empty() || (info.bottommost_level_compaction != \"skip\" &&\n                                                     info.bottommost_level_compaction != \"force\")) {\n        resp.body = \"bottommost_level_compaction should ony be 'skip' or 'force'\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.max_concurrent_running_count < 0) {\n        resp.body = \"max_running_count should be >= 0\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.type == \"periodic\" && info.trigger_time.empty()) {\n        resp.body = \"trigger_time should not be empty when type is periodic\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    // create configuration_update_app_env_request\n    std::vector<std::string> keys;\n    std::vector<std::string> values;\n    if (info.type == \"once\") {\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL);\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION);\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_TRIGGER_TIME);\n    } else {\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_PERIODIC_TARGET_LEVEL);\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_PERIODIC_BOTTOMMOST_LEVEL_COMPACTION);\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_PERIODIC_TRIGGER_TIME);\n    }\n    values.emplace_back(std::to_string(info.target_level));\n    values.emplace_back(info.bottommost_level_compaction);\n    values.emplace_back(info.type == \"once\" ? std::to_string(dsn_now_s()) : info.trigger_time);\n    if (info.max_concurrent_running_count > 0) {\n        keys.emplace_back(replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT);\n        values.emplace_back(std::to_string(info.max_concurrent_running_count));\n    }\n    update_app_env(info.app_name, keys, values, resp);\n}\n\nvoid meta_http_service::update_scenario_handler(const http_request &req, http_response &resp)\n{\n    if (!redirect_if_not_primary(req, resp)) {\n        return;\n    }\n\n    // validate paramters\n    usage_scenario_info info;\n    bool ret = json::json_forwarder<usage_scenario_info>::decode(req.body, info);\n    if (!ret) {\n        resp.body = \"invalid request structure\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.app_name.empty()) {\n        resp.body = \"app_name should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    if (info.scenario.empty() || (info.scenario != \"bulk_load\" && info.scenario != \"normal\")) {\n        resp.body = \"scenario should ony be 'normal' or 'bulk_load'\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    // create configuration_update_app_env_request\n    std::vector<std::string> keys;\n    std::vector<std::string> values;\n    keys.emplace_back(replica_envs::ROCKSDB_USAGE_SCENARIO);\n    values.emplace_back(info.scenario);\n    update_app_env(info.app_name, keys, values, resp);\n}\n\nbool meta_http_service::redirect_if_not_primary(const http_request &req, http_response &resp)\n{\n#ifdef DSN_MOCK_TEST\n    return true;\n#endif\n    rpc_address leader;\n    if (_service->_failure_detector->get_leader(&leader))\n        return true;\n    // set redirect response\n    resp.location = \"http://\" + leader.to_std_string() + '/' + req.path;\n    if (!req.query_args.empty()) {\n        resp.location += '?';\n        for (const auto &i : req.query_args) {\n            resp.location += i.first + '=' + i.second + '&';\n        }\n        resp.location.pop_back(); // remove final '&'\n    }\n    resp.location.erase(std::remove(resp.location.begin(), resp.location.end(), '\\0'),\n                        resp.location.end()); // remove final '\\0'\n    resp.status_code = http_status_code::temporary_redirect;\n    return false;\n}\n\nvoid meta_http_service::update_app_env(const std::string &app_name,\n                                       const std::vector<std::string> &keys,\n                                       const std::vector<std::string> &values,\n                                       http_response &resp)\n{\n    configuration_update_app_env_request request;\n    request.app_name = app_name;\n    request.op = app_env_operation::APP_ENV_OP_SET;\n    request.__set_keys(keys);\n    request.__set_values(values);\n\n    auto rpc_req = dsn::make_unique<configuration_update_app_env_request>(request);\n    update_app_env_rpc rpc(std::move(rpc_req), LPC_META_STATE_NORMAL);\n    _service->_state->set_app_envs(rpc);\n\n    auto rpc_resp = rpc.response();\n    // output as json format\n    dsn::utils::table_printer tp;\n    tp.add_row_name_and_data(\"error\", rpc_resp.err.to_string());\n    tp.add_row_name_and_data(\"hint_message\", rpc_resp.hint_message);\n    std::ostringstream out;\n    tp.output(out, dsn::utils::table_printer::output_format::kJsonCompact);\n    resp.body = out.str();\n    resp.status_code = http_status_code::ok;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_http_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <algorithm>\n\n#include <dsn/cpp/json_helper.h>\n#include <dsn/http/http_server.h>\n\nnamespace dsn {\nnamespace replication {\n\nNON_MEMBER_JSON_SERIALIZATION(\n    start_bulk_load_request, app_name, cluster_name, file_provider_type, remote_root_path)\n\nstruct manual_compaction_info\n{\n    std::string app_name;\n    std::string type;                        // periodic or once\n    int32_t target_level;                    // [-1,num_levels]\n    std::string bottommost_level_compaction; // skip or force\n    int32_t max_concurrent_running_count;    // 0 means no limit\n    std::string trigger_time;                // only used when the type is periodic\n    DEFINE_JSON_SERIALIZATION(app_name,\n                              type,\n                              target_level,\n                              bottommost_level_compaction,\n                              max_concurrent_running_count,\n                              trigger_time)\n};\n\nstruct usage_scenario_info\n{\n    std::string app_name;\n    std::string scenario; // normal or bulk_load\n    DEFINE_JSON_SERIALIZATION(app_name, scenario)\n};\n\nclass meta_service;\nclass meta_http_service : public http_service\n{\npublic:\n    explicit meta_http_service(meta_service *s) : _service(s)\n    {\n        register_handler(\"app\",\n                         std::bind(&meta_http_service::get_app_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/app?app_name=temp\");\n        register_handler(\"app/duplication\",\n                         std::bind(&meta_http_service::query_duplication_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/app/duplication?name=<app_name>\");\n        register_handler(\"apps\",\n                         std::bind(&meta_http_service::list_app_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/apps\");\n        register_handler(\"nodes\",\n                         std::bind(&meta_http_service::list_node_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/nodes\");\n        register_handler(\"cluster\",\n                         std::bind(&meta_http_service::get_cluster_info_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/cluster\");\n        register_handler(\"app_envs\",\n                         std::bind(&meta_http_service::get_app_envs_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/app_envs?name=temp\");\n        register_handler(\"backup_policy\",\n                         std::bind(&meta_http_service::query_backup_policy_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/backup_policy\");\n        // request body should be start_bulk_load_request\n        register_handler(\"app/start_bulk_load\",\n                         std::bind(&meta_http_service::start_bulk_load_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/start_bulk_load\");\n        register_handler(\"app/query_bulk_load\",\n                         std::bind(&meta_http_service::query_bulk_load_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/query_bulk_load?name=temp\");\n        // request body should be manual_compaction_info\n        register_handler(\"app/start_compaction\",\n                         std::bind(&meta_http_service::start_compaction_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/start_compaction\");\n        // request body should be usage_scenario_info\n        register_handler(\"app/usage_scenario\",\n                         std::bind(&meta_http_service::update_scenario_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/meta/app/usage_scenario\");\n    }\n\n    std::string path() const override { return \"meta\"; }\n\n    void get_app_handler(const http_request &req, http_response &resp);\n    void list_app_handler(const http_request &req, http_response &resp);\n    void list_node_handler(const http_request &req, http_response &resp);\n    void get_cluster_info_handler(const http_request &req, http_response &resp);\n    void get_app_envs_handler(const http_request &req, http_response &resp);\n    void query_backup_policy_handler(const http_request &req, http_response &resp);\n    void query_duplication_handler(const http_request &req, http_response &resp);\n    void start_bulk_load_handler(const http_request &req, http_response &resp);\n    void query_bulk_load_handler(const http_request &req, http_response &resp);\n    void start_compaction_handler(const http_request &req, http_response &resp);\n    void update_scenario_handler(const http_request &req, http_response &resp);\n\nprivate:\n    // set redirect location if current server is not primary\n    bool redirect_if_not_primary(const http_request &req, http_response &resp);\n\n    void update_app_env(const std::string &app_name,\n                        const std::vector<std::string> &keys,\n                        const std::vector<std::string> &values,\n                        http_response &resp);\n\n    meta_service *_service;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_options.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's options, impl file\n *\n * Revision history:\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#include \"meta_options.h\"\n\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace replication {\n\nstd::string meta_options::concat_path_unix_style(const std::string &prefix,\n                                                 const std::string &postfix)\n{\n    size_t pos1 = prefix.size(); // last_valid_pos + 1\n    while (pos1 > 0 && prefix[pos1 - 1] == '/')\n        pos1--;\n    size_t pos2 = 0; // first non '/' position\n    while (pos2 < postfix.size() && postfix[pos2] == '/')\n        pos2++;\n    return prefix.substr(0, pos1) + \"/\" + postfix.substr(pos2);\n}\n\nvoid meta_options::initialize()\n{\n    cluster_root = dsn_config_get_value_string(\n        \"meta_server\", \"cluster_root\", \"/\", \"cluster root of meta state service on remote\");\n\n    meta_state_service_type = dsn_config_get_value_string(\"meta_server\",\n                                                          \"meta_state_service_type\",\n                                                          \"meta_state_service_simple\",\n                                                          \"meta_state_service provider type\");\n    const char *meta_state_service_parameters =\n        dsn_config_get_value_string(\"meta_server\",\n                                    \"meta_state_service_parameters\",\n                                    \"\",\n                                    \"meta_state_service provider parameters\");\n    utils::split_args(meta_state_service_parameters, meta_state_service_args);\n\n    node_live_percentage_threshold_for_update = dsn_config_get_value_uint64(\n        \"meta_server\",\n        \"node_live_percentage_threshold_for_update\",\n        65,\n        \"if live_node_count * 100 < total_node_count * node_live_percentage_threshold_for_update, \"\n        \"then freeze the cluster; default is 65\");\n\n    meta_function_level_on_start = meta_function_level::fl_invalid;\n    const char *level_str = dsn_config_get_value_string(\n        \"meta_server\", \"meta_function_level_on_start\", \"steady\", \"meta function level on start\");\n    std::string level = std::string(\"fl_\") + level_str;\n    for (auto &kv : _meta_function_level_VALUES_TO_NAMES) {\n        if (level == kv.second) {\n            meta_function_level_on_start = (meta_function_level::type)kv.first;\n            break;\n        }\n    }\n    dassert(meta_function_level_on_start != meta_function_level::fl_invalid,\n            \"invalid function level: %s\",\n            level_str);\n\n    recover_from_replica_server = dsn_config_get_value_bool(\n        \"meta_server\",\n        \"recover_from_replica_server\",\n        false,\n        \"whether to recover from replica server when no apps in remote storage\");\n\n    hold_seconds_for_dropped_app =\n        dsn_config_get_value_uint64(\"meta_server\",\n                                    \"hold_seconds_for_dropped_app\",\n                                    604800,\n                                    \"how long to hold data for dropped apps\");\n\n    add_secondary_enable_flow_control =\n        dsn_config_get_value_bool(\"meta_server\",\n                                  \"add_secondary_enable_flow_control\",\n                                  false,\n                                  \"enable flow control for add secondary proposal\");\n    add_secondary_max_count_for_one_node = dsn_config_get_value_uint64(\n        \"meta_server\",\n        \"add_secondary_max_count_for_one_node\",\n        10,\n        \"add secondary max count for one node when flow control enabled\");\n\n    /// failure detector options\n    _fd_opts.distributed_lock_service_type =\n        dsn_config_get_value_string(\"meta_server\",\n                                    \"distributed_lock_service_type\",\n                                    \"distributed_lock_service_simple\",\n                                    \"dist lock provider\");\n    const char *distributed_lock_service_parameters =\n        dsn_config_get_value_string(\"meta_server\",\n                                    \"distributed_lock_service_parameters\",\n                                    \"\",\n                                    \"distributed_lock_service provider parameters\");\n    utils::split_args(distributed_lock_service_parameters, _fd_opts.distributed_lock_service_args);\n    _fd_opts.stable_rs_min_running_seconds =\n        dsn_config_get_value_uint64(\"meta_server\",\n                                    \"stable_rs_min_running_seconds\",\n                                    600,\n                                    \"min running seconds for a stable replica server\");\n\n    _fd_opts.max_succssive_unstable_restart = dsn_config_get_value_uint64(\n        \"meta_server\",\n        \"max_succssive_unstable_restart\",\n        5,\n        \"meta server will treat an rs unstable so as to reject it's beacons \"\n        \"if its succssively restarting count exceeds this value\");\n\n    /// load balancer options\n    _lb_opts.server_load_balancer_type =\n        dsn_config_get_value_string(\"meta_server\",\n                                    \"server_load_balancer_type\",\n                                    \"greedy_load_balancer\",\n                                    \"server load balancer provider\");\n    _lb_opts.replica_assign_delay_ms_for_dropouts =\n        dsn_config_get_value_uint64(\"meta_server\",\n                                    \"replica_assign_delay_ms_for_dropouts\",\n                                    300000,\n                                    \"replica_assign_delay_ms_for_dropouts, default is 300000\");\n    _lb_opts.max_replicas_in_group = dsn_config_get_value_uint64(\n        \"meta_server\", \"max_replicas_in_group\", 4, \"max replicas(alive & dead) in a group\");\n\n    _lb_opts.balancer_in_turn = dsn_config_get_value_bool(\n        \"meta_server\", \"balancer_in_turn\", false, \"balance the apps one-by-one/concurrently\");\n    _lb_opts.only_primary_balancer = dsn_config_get_value_bool(\n        \"meta_server\", \"only_primary_balancer\", false, \"only try to make the primary balanced\");\n    _lb_opts.only_move_primary = dsn_config_get_value_bool(\n        \"meta_server\", \"only_move_primary\", false, \"only try to make the primary balanced by move\");\n\n    partition_guardian_type = dsn_config_get_value_string(\"meta_server\",\n                                                          \"partition_guardian_type\",\n                                                          \"partition_guardian\",\n                                                          \"partition guardian provider\");\n\n    cold_backup_disabled = dsn_config_get_value_bool(\n        \"meta_server\", \"cold_backup_disabled\", true, \"whether to disable cold backup\");\n\n    enable_white_list =\n        dsn_config_get_value_bool(\"meta_server\",\n                                  \"enable_white_list\",\n                                  false,\n                                  \"whether to enable white list of replica servers\");\n\n    const char *replica_white_list_raw = dsn_config_get_value_string(\n        \"meta_server\", \"replica_white_list\", \"\", \"white list of replica-servers in meta-server\");\n    utils::split_args(replica_white_list_raw, replica_white_list, ',');\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_options.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's options\n *\n * Revision history:\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#pragma once\n\n#include <string>\n#include <dsn/dist/replication.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass fd_suboptions\n{\npublic:\n    std::string distributed_lock_service_type;\n    std::vector<std::string> distributed_lock_service_args;\n\n    uint64_t stable_rs_min_running_seconds;\n    int32_t max_succssive_unstable_restart;\n};\n\nclass lb_suboptions\n{\npublic:\n    std::string server_load_balancer_type;\n    uint64_t replica_assign_delay_ms_for_dropouts;\n    int32_t max_replicas_in_group;\n\n    bool balancer_in_turn;\n    bool only_primary_balancer;\n    bool only_move_primary;\n};\n\nclass meta_options\n{\npublic:\n    std::string cluster_root;\n    std::string meta_state_service_type;\n    std::vector<std::string> meta_state_service_args;\n\n    uint64_t node_live_percentage_threshold_for_update;\n    meta_function_level::type meta_function_level_on_start;\n    bool recover_from_replica_server;\n    int32_t hold_seconds_for_dropped_app;\n\n    bool add_secondary_enable_flow_control;\n    int32_t add_secondary_max_count_for_one_node;\n\n    fd_suboptions _fd_opts;\n    lb_suboptions _lb_opts;\n    std::string partition_guardian_type;\n\n    bool cold_backup_disabled;\n\n    bool enable_white_list;\n    std::vector<std::string> replica_white_list;\n\npublic:\n    void initialize();\n\npublic:\n    static std::string concat_path_unix_style(const std::string &prefix,\n                                              const std::string &postfix);\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_rpc_types.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nnamespace replication {\n\ntypedef rpc_holder<configuration_update_app_env_request, configuration_update_app_env_response>\n    app_env_rpc;\ntypedef rpc_holder<ddd_diagnose_request, ddd_diagnose_response> ddd_diagnose_rpc;\ntypedef rpc_holder<configuration_query_by_node_request, configuration_query_by_node_response>\n    configuration_query_by_node_rpc;\ntypedef rpc_holder<configuration_query_by_index_request, configuration_query_by_index_response>\n    configuration_query_by_index_rpc;\ntypedef rpc_holder<configuration_list_apps_request, configuration_list_apps_response>\n    configuration_list_apps_rpc;\ntypedef rpc_holder<configuration_list_nodes_request, configuration_list_nodes_response>\n    configuration_list_nodes_rpc;\ntypedef rpc_holder<configuration_cluster_info_request, configuration_cluster_info_response>\n    configuration_cluster_info_rpc;\ntypedef rpc_holder<configuration_balancer_request, configuration_balancer_response>\n    configuration_balancer_rpc;\ntypedef rpc_holder<configuration_meta_control_request, configuration_meta_control_response>\n    configuration_meta_control_rpc;\ntypedef rpc_holder<configuration_recovery_request, configuration_recovery_response>\n    configuration_recovery_rpc;\ntypedef rpc_holder<configuration_report_restore_status_request,\n                   configuration_report_restore_status_response>\n    configuration_report_restore_status_rpc;\ntypedef rpc_holder<configuration_query_restore_request, configuration_query_restore_response>\n    configuration_query_restore_rpc;\ntypedef rpc_holder<configuration_query_backup_policy_request,\n                   configuration_query_backup_policy_response>\n    query_backup_policy_rpc;\ntypedef rpc_holder<configuration_modify_backup_policy_request,\n                   configuration_modify_backup_policy_response>\n    configuration_modify_backup_policy_rpc;\ntypedef rpc_holder<start_backup_app_request, start_backup_app_response> start_backup_app_rpc;\ntypedef rpc_holder<query_backup_status_request, query_backup_status_response>\n    query_backup_status_rpc;\ntypedef rpc_holder<configuration_get_max_replica_count_request,\n                   configuration_get_max_replica_count_response>\n    configuration_get_max_replica_count_rpc;\ntypedef rpc_holder<configuration_set_max_replica_count_request,\n                   configuration_set_max_replica_count_response>\n    configuration_set_max_replica_count_rpc;\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_server_failure_detector.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/dist/fmt_logging.h>\n#include \"meta_server_failure_detector.h\"\n#include \"server_state.h\"\n#include \"meta_service.h\"\n#include \"meta_options.h\"\n\n#include <chrono>\n#include <thread>\n\nnamespace dsn {\nnamespace replication {\n\nmeta_server_failure_detector::meta_server_failure_detector(meta_service *svc)\n    : _svc(svc),\n      _lock_svc(nullptr),\n      _primary_lock_id(\"dsn.meta.server.leader\"),\n      _is_leader(false),\n      _election_moment(0)\n{\n    _fd_opts = &(svc->get_meta_options()._fd_opts);\n    _lock_svc = dsn::utils::factory_store<dist::distributed_lock_service>::create(\n        _fd_opts->distributed_lock_service_type.c_str(), PROVIDER_TYPE_MAIN);\n    error_code err = _lock_svc->initialize(_fd_opts->distributed_lock_service_args);\n    dassert(err == ERR_OK, \"init distributed_lock_service failed, err = %s\", err.to_string());\n}\n\nmeta_server_failure_detector::~meta_server_failure_detector()\n{\n    if (_lock_grant_task)\n        _lock_grant_task->cancel(true);\n    if (_lock_expire_task)\n        _lock_expire_task->cancel(true);\n    if (_lock_svc) {\n        _lock_svc->finalize();\n        delete _lock_svc;\n    }\n}\n\nvoid meta_server_failure_detector::on_worker_disconnected(const std::vector<rpc_address> &nodes)\n{\n    _svc->set_node_state(nodes, false);\n}\n\nvoid meta_server_failure_detector::on_worker_connected(rpc_address node)\n{\n    _svc->set_node_state(std::vector<rpc_address>{node}, true);\n}\n\nbool meta_server_failure_detector::get_leader(rpc_address *leader)\n{\n    FAIL_POINT_INJECT_F(\"meta_server_failure_detector_get_leader\", [leader](dsn::string_view str) {\n        /// the format of str is : true#{ip}:{port} or false#{ip}:{port}\n        auto pos = str.find(\"#\");\n        // get leader addr\n        auto addr_part = str.substr(pos + 1, str.length() - pos - 1);\n        if (!leader->from_string_ipv4(addr_part.data())) {\n            dassert_f(false, \"parse {} to rpc_address failed\", addr_part);\n        }\n\n        // get the return value which implies whether the current node is primary or not\n        bool is_leader = true;\n        auto is_leader_part = str.substr(0, pos);\n        if (!dsn::buf2bool(is_leader_part, is_leader)) {\n            dassert_f(false, \"parse {} to bool failed\", is_leader_part);\n        }\n        return is_leader;\n    });\n\n    dsn::rpc_address holder;\n    if (leader == nullptr) {\n        leader = &holder;\n    }\n\n    if (_is_leader.load()) {\n        *leader = dsn_primary_address();\n        return true;\n    } else if (_lock_svc == nullptr) {\n        leader->set_invalid();\n        return false;\n    } else {\n        std::string lock_owner;\n        uint64_t version;\n        error_code err = _lock_svc->query_cache(_primary_lock_id, lock_owner, version);\n        if (err == dsn::ERR_OK && leader->from_string_ipv4(lock_owner.c_str())) {\n            return (*leader) == dsn_primary_address();\n        } else {\n            dwarn(\"query leader from cache got error(%s)\", err.to_string());\n            leader->set_invalid();\n            return false;\n        }\n    }\n}\n\nDEFINE_TASK_CODE(LPC_META_SERVER_LEADER_LOCK_CALLBACK, TASK_PRIORITY_COMMON, fd::THREAD_POOL_FD)\nvoid meta_server_failure_detector::acquire_leader_lock()\n{\n    //\n    // try to get the leader lock until it is done\n    //\n    dsn::dist::distributed_lock_service::lock_options opt = {true, true};\n    while (true) {\n        error_code err;\n        auto tasks = _lock_svc->lock(\n            _primary_lock_id,\n            dsn_primary_address().to_std_string(),\n            // lock granted\n            LPC_META_SERVER_LEADER_LOCK_CALLBACK,\n            [this, &err](error_code ec, const std::string &owner, uint64_t version) {\n                ddebug(\"leader lock granted callback: err(%s), owner(%s), version(%llu)\",\n                       ec.to_string(),\n                       owner.c_str(),\n                       version);\n                err = ec;\n                if (err == dsn::ERR_OK) {\n                    leader_initialize(owner);\n                }\n            },\n\n            // lease expire\n            LPC_META_SERVER_LEADER_LOCK_CALLBACK,\n            [](error_code ec, const std::string &owner, uint64_t version) {\n                derror(\"leader lock expired callback: err(%s), owner(%s), version(%llu)\",\n                       ec.to_string(),\n                       owner.c_str(),\n                       version);\n                // let's take the easy way right now\n                dsn_exit(0);\n            },\n            opt);\n\n        _lock_grant_task = tasks.first;\n        _lock_expire_task = tasks.second;\n\n        _lock_grant_task->wait();\n        if (err == ERR_OK) {\n            break;\n        } else {\n            // sleep for 1 second before retry\n            std::this_thread::sleep_for(std::chrono::seconds(1));\n        }\n    }\n}\n\nvoid meta_server_failure_detector::reset_stability_stat(const rpc_address &node)\n{\n    zauto_lock l(_map_lock);\n    auto iter = _stablity.find(node);\n    if (iter == _stablity.end())\n        return;\n    else {\n        ddebug(\"old stability stat: node(%s), start_time(%lld), unstable_count(%d), will reset \"\n               \"unstable count to 0\",\n               node.to_string(),\n               iter->second.last_start_time_ms,\n               iter->second.unstable_restart_count);\n        iter->second.unstable_restart_count = 0;\n    }\n}\n\nvoid meta_server_failure_detector::leader_initialize(const std::string &lock_service_owner)\n{\n    dsn::rpc_address addr;\n    dassert(addr.from_string_ipv4(lock_service_owner.c_str()),\n            \"parse %s to rpc_address failed\",\n            lock_service_owner.c_str());\n    dassert(addr == dsn_primary_address(),\n            \"acquire leader return success, but owner not match: %s vs %s\",\n            addr.to_string(),\n            dsn_primary_address().to_string());\n    _is_leader.store(true);\n    _election_moment.store(dsn_now_ms());\n}\n\nbool meta_server_failure_detector::update_stability_stat(const fd::beacon_msg &beacon)\n{\n    zauto_lock l(_map_lock);\n    auto iter = _stablity.find(beacon.from_addr);\n    if (iter == _stablity.end()) {\n        _stablity.emplace(beacon.from_addr, worker_stability{beacon.start_time, 0});\n        return true;\n    } else {\n        worker_stability &w = iter->second;\n        if (beacon.start_time == w.last_start_time_ms) {\n            dinfo(\"%s isn't restarted, last_start_time(%lld)\",\n                  beacon.from_addr.to_string(),\n                  w.last_start_time_ms);\n            if (dsn_now_ms() - w.last_start_time_ms >=\n                    _fd_opts->stable_rs_min_running_seconds * 1000 &&\n                w.unstable_restart_count > 0) {\n                ddebug(\"%s has stably run for a while, reset it's unstable count(%d) to 0\",\n                       beacon.from_addr.to_string(),\n                       w.unstable_restart_count);\n                w.unstable_restart_count = 0;\n            }\n        } else if (beacon.start_time > w.last_start_time_ms) {\n            ddebug(\"check %s restarted, last_time(%lld), this_time(%lld)\",\n                   beacon.from_addr.to_string(),\n                   w.last_start_time_ms,\n                   beacon.start_time);\n            if (beacon.start_time - w.last_start_time_ms <\n                _fd_opts->stable_rs_min_running_seconds * 1000) {\n                w.unstable_restart_count++;\n                dwarn(\"%s encounter an unstable restart, total_count(%d)\",\n                      beacon.from_addr.to_string(),\n                      w.unstable_restart_count);\n            } else if (w.unstable_restart_count > 0) {\n                ddebug(\"%s restart in %lld ms after last restart, may recover ok, reset \"\n                       \"it's unstable count(%d) to 0\",\n                       beacon.from_addr.to_string(),\n                       beacon.start_time - w.last_start_time_ms,\n                       w.unstable_restart_count);\n                w.unstable_restart_count = 0;\n            }\n\n            w.last_start_time_ms = beacon.start_time;\n        } else {\n            dwarn(\"%s: possible encounter a staled message, ignore it\",\n                  beacon.from_addr.to_string());\n        }\n        return w.unstable_restart_count < _fd_opts->max_succssive_unstable_restart;\n    }\n}\n\nvoid meta_server_failure_detector::on_ping(const fd::beacon_msg &beacon,\n                                           rpc_replier<fd::beacon_ack> &reply)\n{\n    fd::beacon_ack ack;\n    ack.time = beacon.time;\n    ack.this_node = beacon.to_addr;\n    ack.allowed = true;\n\n    if (beacon.__isset.start_time && !update_stability_stat(beacon)) {\n        dwarn(\"%s is unstable, don't response to it's beacon\", beacon.from_addr.to_string());\n        return;\n    }\n\n    dsn::rpc_address leader;\n    if (!get_leader(&leader)) {\n        ack.is_master = false;\n        ack.primary_node = leader;\n    } else {\n        ack.is_master = true;\n        ack.primary_node = beacon.to_addr;\n        failure_detector::on_ping_internal(beacon, ack);\n    }\n\n    ddebug(\"on_ping, beacon send time[%ld], is_master(%s), from_node(%s), this_node(%s), \"\n           \"primary_node(%s)\",\n           ack.time,\n           ack.is_master ? \"true\" : \"false\",\n           beacon.from_addr.to_string(),\n           ack.this_node.to_string(),\n           ack.primary_node.to_string());\n\n    reply(ack);\n}\n\n/*the following functions are only for test*/\nmeta_server_failure_detector::meta_server_failure_detector(rpc_address leader_address,\n                                                           bool is_myself_leader)\n{\n    ddebug(\"set %s as leader\", leader_address.to_string());\n    _lock_svc = nullptr;\n    _is_leader.store(is_myself_leader);\n}\n\nvoid meta_server_failure_detector::set_leader_for_test(rpc_address leader_address,\n                                                       bool is_myself_leader)\n{\n    ddebug(\"set %s as leader\", leader_address.to_string());\n    _is_leader.store(is_myself_leader);\n}\n\nmeta_server_failure_detector::stability_map *\nmeta_server_failure_detector::get_stability_map_for_test()\n{\n    return &_stablity;\n}\n}\n}\n"
  },
  {
    "path": "src/meta/meta_server_failure_detector.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/dist/failure_detector.h>\n#include <dsn/dist/distributed_lock_service.h>\n\n#include \"common/replication_common.h\"\n#include \"meta_options.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_service;\nnamespace test {\nclass test_checker;\n}\nclass meta_server_failure_detector : public fd::failure_detector\n{\npublic:\n    struct worker_stability\n    {\n        int64_t last_start_time_ms;\n        int unstable_restart_count;\n        worker_stability(int64_t lst, int urc)\n            : last_start_time_ms(lst), unstable_restart_count(urc)\n        {\n        }\n    };\n    typedef std::map<dsn::rpc_address, worker_stability> stability_map;\n\npublic:\n    meta_server_failure_detector(meta_service *svc);\n    virtual ~meta_server_failure_detector();\n\n    // get the meta-server's leader\n    // leader: the leader's address. Invalid if no leader selected\n    //         if leader==nullptr, then the new leader won't be returned\n    // ret true if i'm the current leader; false if not.\n    bool get_leader(/*output*/ dsn::rpc_address *leader);\n\n    // return if acquire the leader lock, or-else blocked forever\n    void acquire_leader_lock();\n\n    void reset_stability_stat(const dsn::rpc_address &node);\n\n    // _fd_opts is initialized in constructor with a fd_suboption stored in meta_service.\n    // so usually you don't need to call this.\n    // the function is mainly for a test module, in which the fd object is created without the\n    // \"meta_service\", please make sure that options's lifetime is longer than the fd object\n    void set_options(fd_suboptions *options) { _fd_opts = options; }\n\n    // client side\n    virtual void on_master_disconnected(const std::vector<rpc_address> &)\n    {\n        dassert(false, \"unsupported method\");\n    }\n    virtual void on_master_connected(rpc_address) { dassert(false, \"unsupported method\"); }\n\n    // server side\n    // it is in the protection of failure_detector::_lock\n    virtual void on_worker_disconnected(const std::vector<rpc_address> &nodes) override;\n    // it is in the protection of failure_detector::_lock\n    virtual void on_worker_connected(rpc_address node) override;\n    virtual bool is_worker_connected(rpc_address node) const override\n    {\n        // we treat all nodes not in the worker list alive in the first grace period.\n        // For the reason, please consider this situation:\n        // 1. a RS connected to a meta M1\n        // 2. M1 crashed, then M2 selected as new leader, before the first beacon of RS sent\n        //    to M2, RS is not in the worker_map of M2.\n        // 3. If M2 claims RS is not alive, then the perfect-FD's constraint will be broken.\n        //    Coz RS will find itself dead after the leader-periods.\n        if (_election_moment.load() + get_grace_ms() < dsn_now_ms()) {\n            return true;\n        }\n        return failure_detector::is_worker_connected(node);\n    }\n    virtual void on_ping(const fd::beacon_msg &beacon, rpc_replier<fd::beacon_ack> &reply) override;\n\nprivate:\n    // return value: return true if beacon.from_addr is stable; or-else, false\n    bool update_stability_stat(const fd::beacon_msg &beacon);\n    void leader_initialize(const std::string &lock_service_owner);\n\nprivate:\n    // meta_service need to visit the failure_detector's lock\n    friend class meta_service;\n\n    friend class test::test_checker;\n\n    // initialize in the constructor\n    meta_service *_svc;\n    dist::distributed_lock_service *_lock_svc;\n    std::string _primary_lock_id;\n    const fd_suboptions *_fd_opts;\n\n    // initialize in acquire_leader_lock\n    task_ptr _lock_grant_task;\n    task_ptr _lock_expire_task;\n    std::atomic_bool _is_leader;\n    std::atomic<uint64_t> _election_moment;\n\n    // record the start time of a replica-server, check if it crashed frequently\n    mutable zlock _map_lock;\n    stability_map _stablity;\n\npublic:\n    /* these two functions are for test */\n    meta_server_failure_detector(rpc_address leader_address, bool is_myself_leader);\n    void set_leader_for_test(rpc_address leader_address, bool is_myself_leader);\n    stability_map *get_stability_map_for_test();\n};\n}\n}\n"
  },
  {
    "path": "src/meta/meta_service.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <sys/stat.h>\n\n#include <boost/lexical_cast.hpp>\n#include <fmt/format.h>\n\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/dist/meta_state_service.h>\n#include <dsn/dist/common.h>\n#include <dsn/dist/remote_command.h>\n#include <dsn/tool-api/command_manager.h>\n#include <algorithm> // for std::remove_if\n#include <cctype>    // for ::isspace\n#include <dsn/dist/fmt_logging.h>\n\n#include \"meta_service.h\"\n#include \"server_state.h\"\n#include \"meta_server_failure_detector.h\"\n#include \"server_load_balancer.h\"\n#include \"meta/duplication/meta_duplication_service.h\"\n#include \"meta_split_service.h\"\n#include \"meta_bulk_load_service.h\"\n#include \"runtime/security/access_controller.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint64(\"meta_server\",\n                  min_live_node_count_for_unfreeze,\n                  3,\n                  \"minimum live node count without which the state is freezed\");\nDSN_TAG_VARIABLE(min_live_node_count_for_unfreeze, FT_MUTABLE);\nDSN_DEFINE_validator(min_live_node_count_for_unfreeze,\n                     [](uint64_t min_live_node_count) -> bool { return min_live_node_count > 0; });\n\nmeta_service::meta_service()\n    : serverlet(\"meta_service\"), _failure_detector(nullptr), _started(false), _recovering(false)\n{\n    _opts.initialize();\n    _meta_opts.initialize();\n    _node_live_percentage_threshold_for_update =\n        _meta_opts.node_live_percentage_threshold_for_update;\n    _state.reset(new server_state());\n    _function_level.store(_meta_opts.meta_function_level_on_start);\n    if (_meta_opts.recover_from_replica_server) {\n        ddebug(\"enter recovery mode for [meta_server].recover_from_replica_server = true\");\n        _recovering = true;\n        if (_meta_opts.meta_function_level_on_start > meta_function_level::fl_steady) {\n            ddebug(\"meta server function level changed to fl_steady under recovery mode\");\n            _function_level.store(meta_function_level::fl_steady);\n        }\n    }\n\n    _recent_disconnect_count.init_app_counter(\n        \"eon.meta_service\",\n        \"recent_disconnect_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"replica server disconnect count in the recent period\");\n    _unalive_nodes_count.init_app_counter(\n        \"eon.meta_service\", \"unalive_nodes\", COUNTER_TYPE_NUMBER, \"current count of unalive nodes\");\n    _alive_nodes_count.init_app_counter(\n        \"eon.meta_service\", \"alive_nodes\", COUNTER_TYPE_NUMBER, \"current count of alive nodes\");\n\n    _access_controller = security::create_meta_access_controller();\n\n    _meta_op_status.store(meta_op_status::FREE);\n}\n\nmeta_service::~meta_service() { stop(); }\n\nvoid meta_service::stop()\n{\n    zauto_write_lock l(_meta_lock);\n    if (!_started.load()) {\n        return;\n    }\n    _tracker.cancel_outstanding_tasks();\n    unregister_ctrl_commands();\n    _started = false;\n}\n\nbool meta_service::check_freeze() const\n{\n    zauto_lock l(_failure_detector->_lock);\n    if (_alive_set.size() < FLAGS_min_live_node_count_for_unfreeze)\n        return true;\n    int total = _alive_set.size() + _dead_set.size();\n    return _alive_set.size() * 100 < _node_live_percentage_threshold_for_update * total;\n}\n\ntemplate <typename TRpcHolder>\nint meta_service::check_leader(TRpcHolder rpc, rpc_address *forward_address)\n{\n    dsn::rpc_address leader;\n    if (!_failure_detector->get_leader(&leader)) {\n        if (!rpc.dsn_request()->header->context.u.is_forward_supported) {\n            if (forward_address != nullptr)\n                *forward_address = leader;\n            return -1;\n        }\n\n        dinfo(\"leader address: %s\", leader.to_string());\n        if (!leader.is_invalid()) {\n            rpc.forward(leader);\n            return 0;\n        } else {\n            if (forward_address != nullptr)\n                forward_address->set_invalid();\n            return -1;\n        }\n    }\n    return 1;\n}\n\ntemplate <typename TRpcHolder>\nbool meta_service::check_status(TRpcHolder rpc, rpc_address *forward_address)\n{\n    if (!_access_controller->allowed(rpc.dsn_request())) {\n        rpc.response().err = ERR_ACL_DENY;\n        ddebug(\"reject request with ERR_ACL_DENY\");\n        return false;\n    }\n\n    int result = check_leader(rpc, forward_address);\n    if (result == 0)\n        return false;\n    if (result == -1 || !_started) {\n        if (result == -1) {\n            rpc.response().err = ERR_FORWARD_TO_OTHERS;\n        } else if (_recovering) {\n            rpc.response().err = ERR_UNDER_RECOVERY;\n        } else {\n            rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        }\n        ddebug(\"reject request with %s\", rpc.response().err.to_string());\n        return false;\n    }\n\n    return true;\n}\n\ntemplate <typename TRespType>\nbool meta_service::check_status_with_msg(message_ex *req, TRespType &response_struct)\n{\n    if (!_access_controller->allowed(req)) {\n        ddebug(\"reject request with ERR_ACL_DENY\");\n        response_struct.err = ERR_ACL_DENY;\n        reply(req, response_struct);\n        return false;\n    }\n\n    int result = check_leader(req, nullptr);\n    if (result == 0) {\n        return false;\n    }\n    if (result == -1 || !_started) {\n        if (result == -1) {\n            response_struct.err = ERR_FORWARD_TO_OTHERS;\n        } else if (_recovering) {\n            response_struct.err = ERR_UNDER_RECOVERY;\n        } else {\n            response_struct.err = ERR_SERVICE_NOT_ACTIVE;\n        }\n        ddebug(\"reject request with %s\", response_struct.err.to_string());\n        reply(req, response_struct);\n        return false;\n    }\n\n    return true;\n}\n\nerror_code meta_service::remote_storage_initialize()\n{\n    // create storage\n    dsn::dist::meta_state_service *storage =\n        dsn::utils::factory_store<::dsn::dist::meta_state_service>::create(\n            _meta_opts.meta_state_service_type.c_str(), PROVIDER_TYPE_MAIN);\n    error_code err = storage->initialize(_meta_opts.meta_state_service_args);\n    if (err != ERR_OK) {\n        derror(\"init meta_state_service failed, err = %s\", err.to_string());\n        return err;\n    }\n    _storage.reset(storage);\n    _meta_storage.reset(new mss::meta_storage(_storage.get(), &_tracker));\n\n    std::vector<std::string> slices;\n    utils::split_args(_meta_opts.cluster_root.c_str(), slices, '/');\n    std::string current = \"\";\n    for (unsigned int i = 0; i != slices.size(); ++i) {\n        current = meta_options::concat_path_unix_style(current, slices[i]);\n        task_ptr tsk =\n            _storage->create_node(current, LPC_META_CALLBACK, [&err](error_code ec) { err = ec; });\n        tsk->wait();\n        if (err != ERR_OK && err != ERR_NODE_ALREADY_EXIST) {\n            derror(\n                \"create node failed, node_path = %s, err = %s\", current.c_str(), err.to_string());\n            return err;\n        }\n    }\n    _cluster_root = current.empty() ? \"/\" : current;\n\n    ddebug(\"init meta_state_service succeed, cluster_root = %s\", _cluster_root.c_str());\n    return ERR_OK;\n}\n\n// visited in protection of failure_detector::_lock\nvoid meta_service::set_node_state(const std::vector<rpc_address> &nodes, bool is_alive)\n{\n    for (auto &node : nodes) {\n        if (is_alive) {\n            _alive_set.insert(node);\n            _dead_set.erase(node);\n        } else {\n            _alive_set.erase(node);\n            _dead_set.insert(node);\n        }\n    }\n\n    _recent_disconnect_count->add(is_alive ? 0 : nodes.size());\n    _unalive_nodes_count->set(_dead_set.size());\n    _alive_nodes_count->set(_alive_set.size());\n\n    if (!_started) {\n        return;\n    }\n    for (const rpc_address &address : nodes) {\n        tasking::enqueue(\n            LPC_META_STATE_HIGH,\n            nullptr,\n            std::bind(&server_state::on_change_node_state, _state.get(), address, is_alive),\n            server_state::sStateHash);\n    }\n}\n\nvoid meta_service::get_node_state(/*out*/ std::map<rpc_address, bool> &all_nodes)\n{\n    zauto_lock l(_failure_detector->_lock);\n    for (auto &node : _alive_set)\n        all_nodes[node] = true;\n    for (auto &node : _dead_set)\n        all_nodes[node] = false;\n}\n\nvoid meta_service::balancer_run() { _state->check_all_partitions(); }\n\nbool meta_service::try_lock_meta_op_status(meta_op_status op_status)\n{\n    meta_op_status expected = meta_op_status::FREE;\n    if (!_meta_op_status.compare_exchange_strong(expected, op_status)) {\n        derror_f(\"LOCK meta op status failed, meta \"\n                 \"server is busy, current op status is {}\",\n                 enum_to_string(expected));\n        return false;\n    }\n\n    ddebug_f(\"LOCK meta op status to {}\", enum_to_string(op_status));\n    return true;\n}\n\nvoid meta_service::unlock_meta_op_status()\n{\n    ddebug_f(\"UNLOCK meta op status from {}\", enum_to_string(_meta_op_status.load()));\n    _meta_op_status.store(meta_op_status::FREE);\n}\n\nvoid meta_service::register_ctrl_commands()\n{\n    _ctrl_node_live_percentage_threshold_for_update =\n        dsn::command_manager::instance().register_command(\n            {\"meta.live_percentage\"},\n            \"meta.live_percentage [num | DEFAULT]\",\n            \"node live percentage threshold for update\",\n            [this](const std::vector<std::string> &args) {\n                std::string result(\"OK\");\n                if (args.empty()) {\n                    result = std::to_string(_node_live_percentage_threshold_for_update);\n                } else {\n                    if (args[0] == \"DEFAULT\") {\n                        _node_live_percentage_threshold_for_update =\n                            _meta_opts.node_live_percentage_threshold_for_update;\n                    } else {\n                        int32_t v = 0;\n                        if (!dsn::buf2int32(args[0], v) || v < 0) {\n                            result = std::string(\"ERR: invalid arguments\");\n                        } else {\n                            _node_live_percentage_threshold_for_update = v;\n                        }\n                    }\n                }\n                return result;\n            });\n}\n\nvoid meta_service::unregister_ctrl_commands()\n{\n    UNREGISTER_VALID_HANDLER(_ctrl_node_live_percentage_threshold_for_update);\n    if (_partition_guardian != nullptr) {\n        _partition_guardian->unregister_ctrl_commands();\n    }\n    if (_balancer != nullptr) {\n        _balancer->unregister_ctrl_commands();\n    }\n}\n\nvoid meta_service::start_service()\n{\n    zauto_lock l(_failure_detector->_lock);\n\n    const meta_view view = _state->get_meta_view();\n    for (auto &kv : *view.nodes) {\n        if (_dead_set.find(kv.first) == _dead_set.end())\n            _alive_set.insert(kv.first);\n    }\n\n    _alive_nodes_count->set(_alive_set.size());\n\n    for (const dsn::rpc_address &node : _alive_set) {\n        // sync alive set and the failure_detector\n        _failure_detector->unregister_worker(node);\n        _failure_detector->register_worker(node, true);\n    }\n\n    _started = true;\n    for (const dsn::rpc_address &node : _alive_set) {\n        tasking::enqueue(LPC_META_STATE_HIGH,\n                         nullptr,\n                         std::bind(&server_state::on_change_node_state, _state.get(), node, true),\n                         server_state::sStateHash);\n    }\n    for (const dsn::rpc_address &node : _dead_set) {\n        tasking::enqueue(LPC_META_STATE_HIGH,\n                         nullptr,\n                         std::bind(&server_state::on_change_node_state, _state.get(), node, false),\n                         server_state::sStateHash);\n    }\n\n    tasking::enqueue_timer(LPC_META_STATE_NORMAL,\n                           nullptr,\n                           std::bind(&meta_service::balancer_run, this),\n                           std::chrono::milliseconds(_opts.lb_interval_ms),\n                           server_state::sStateHash,\n                           std::chrono::milliseconds(_opts.lb_interval_ms));\n\n    if (!_meta_opts.cold_backup_disabled) {\n        ddebug(\"start backup service\");\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         nullptr,\n                         std::bind(&backup_service::start, _backup_handler.get()));\n    }\n\n    if (_bulk_load_svc) {\n        ddebug(\"start bulk load service\");\n        tasking::enqueue(LPC_META_CALLBACK, tracker(), [this]() {\n            _bulk_load_svc->initialize_bulk_load_service();\n        });\n    }\n}\n\n// the start function is executed in threadpool default\nerror_code meta_service::start()\n{\n    dassert(!_started, \"meta service is already started\");\n    register_ctrl_commands();\n\n    error_code err;\n\n    err = remote_storage_initialize();\n    dreturn_not_ok_logged(err, \"init remote storage failed, err = %s\", err.to_string());\n    ddebug(\"remote storage is successfully initialized\");\n\n    // start failure detector, and try to acquire the leader lock\n    _failure_detector.reset(new meta_server_failure_detector(this));\n    if (_meta_opts.enable_white_list)\n        _failure_detector->set_allow_list(_meta_opts.replica_white_list);\n    _failure_detector->register_ctrl_commands();\n\n    err = _failure_detector->start(_opts.fd_check_interval_seconds,\n                                   _opts.fd_beacon_interval_seconds,\n                                   _opts.fd_lease_seconds,\n                                   _opts.fd_grace_seconds,\n                                   _meta_opts.enable_white_list);\n\n    dreturn_not_ok_logged(err, \"start failure_detector failed, err = %s\", err.to_string());\n    ddebug(\"meta service failure detector is successfully started %s\",\n           _meta_opts.enable_white_list ? \"with whitelist enabled\" : \"\");\n\n    // should register rpc handlers before acquiring leader lock, so that this meta service\n    // can tell others who is the current leader\n    register_rpc_handlers();\n\n    // start remote command service before acquiring leader lock,\n    // so that the command line call can be handled\n    dist::cmd::register_remote_command_rpc();\n\n    _failure_detector->acquire_leader_lock();\n    dassert(_failure_detector->get_leader(nullptr), \"must be primary at this point\");\n    ddebug(\"%s got the primary lock, start to recover server state from remote storage\",\n           dsn_primary_address().to_string());\n\n    // initialize the load balancer\n    server_load_balancer *balancer = utils::factory_store<server_load_balancer>::create(\n        _meta_opts._lb_opts.server_load_balancer_type.c_str(), PROVIDER_TYPE_MAIN, this);\n    _balancer.reset(balancer);\n    // register control command to singleton-container for load balancer\n    _balancer->register_ctrl_commands();\n\n    partition_guardian *guardian = utils::factory_store<partition_guardian>::create(\n        _meta_opts.partition_guardian_type.c_str(), PROVIDER_TYPE_MAIN, this);\n    _partition_guardian.reset(guardian);\n    _partition_guardian->register_ctrl_commands();\n\n    // initializing the backup_handler should after remote_storage be initialized,\n    // because we should use _cluster_root\n    if (!_meta_opts.cold_backup_disabled) {\n        ddebug(\"initialize backup handler\");\n        _backup_handler = std::make_shared<backup_service>(\n            this,\n            meta_options::concat_path_unix_style(_cluster_root, \"backup\"),\n            _opts.cold_backup_root,\n            [](backup_service *bs) { return std::make_shared<policy_context>(bs); });\n    }\n\n    _bulk_load_svc = make_unique<bulk_load_service>(\n        this, meta_options::concat_path_unix_style(_cluster_root, \"bulk_load\"));\n\n    // initialize the server_state\n    _state->initialize(this, meta_options::concat_path_unix_style(_cluster_root, \"apps\"));\n    while ((err = _state->initialize_data_structure()) != ERR_OK) {\n        if (err == ERR_OBJECT_NOT_FOUND && _meta_opts.recover_from_replica_server) {\n            ddebug(\"can't find apps from remote storage, and \"\n                   \"[meta_server].recover_from_replica_server = true, \"\n                   \"administrator should recover this cluster manually later\");\n            return dsn::ERR_OK;\n        }\n        derror(\"initialize server state from remote storage failed, err = %s, retry ...\",\n               err.to_string());\n    }\n\n    _state->recover_from_max_replica_count_env();\n\n    initialize_duplication_service();\n    recover_duplication_from_meta_state();\n\n    _split_svc = dsn::make_unique<meta_split_service>(this);\n\n    _state->register_cli_commands();\n\n    start_service();\n\n    ddebug(\"start meta_service succeed\");\n\n    return ERR_OK;\n}\n\nvoid meta_service::register_rpc_handlers()\n{\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_CONFIG_SYNC, \"config_sync\", &meta_service::on_config_sync);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX,\n                                         \"query_configuration_by_index\",\n                                         &meta_service::on_query_configuration_by_index);\n    register_rpc_handler(RPC_CM_UPDATE_PARTITION_CONFIGURATION,\n                         \"update_configuration\",\n                         &meta_service::on_update_configuration);\n    register_rpc_handler(RPC_CM_CREATE_APP, \"create_app\", &meta_service::on_create_app);\n    register_rpc_handler(RPC_CM_DROP_APP, \"drop_app\", &meta_service::on_drop_app);\n    register_rpc_handler(RPC_CM_RECALL_APP, \"recall_app\", &meta_service::on_recall_app);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_LIST_APPS, \"list_apps\", &meta_service::on_list_apps);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_LIST_NODES, \"list_nodes\", &meta_service::on_list_nodes);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_CLUSTER_INFO, \"cluster_info\", &meta_service::on_query_cluster_info);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_PROPOSE_BALANCER, \"propose_balancer\", &meta_service::on_propose_balancer);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_CONTROL_META, \"control_meta_level\", &meta_service::on_control_meta_level);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_START_RECOVERY, \"start_recovery\", &meta_service::on_start_recovery);\n    register_rpc_handler(RPC_CM_START_RESTORE, \"start_restore\", &meta_service::on_start_restore);\n    register_rpc_handler(\n        RPC_CM_ADD_BACKUP_POLICY, \"add_backup_policy\", &meta_service::on_add_backup_policy);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_QUERY_BACKUP_POLICY, \"query_backup_policy\", &meta_service::on_query_backup_policy);\n    register_rpc_handler_with_rpc_holder(RPC_CM_MODIFY_BACKUP_POLICY,\n                                         \"modify_backup_policy\",\n                                         &meta_service::on_modify_backup_policy);\n    register_rpc_handler_with_rpc_holder(RPC_CM_REPORT_RESTORE_STATUS,\n                                         \"report_restore_status\",\n                                         &meta_service::on_report_restore_status);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_RESTORE_STATUS,\n                                         \"query_restore_status\",\n                                         &meta_service::on_query_restore_status);\n    register_duplication_rpc_handlers();\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_UPDATE_APP_ENV, \"update_app_env(set/del/clear)\", &meta_service::update_app_env);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_DDD_DIAGNOSE, \"ddd_diagnose\", &meta_service::ddd_diagnose);\n    register_rpc_handler_with_rpc_holder(RPC_CM_START_PARTITION_SPLIT,\n                                         \"start_partition_split\",\n                                         &meta_service::on_start_partition_split);\n    register_rpc_handler_with_rpc_holder(RPC_CM_CONTROL_PARTITION_SPLIT,\n                                         \"control_partition_split(pause/restart/cancel)\",\n                                         &meta_service::on_control_partition_split);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_PARTITION_SPLIT,\n                                         \"query_partition_split\",\n                                         &meta_service::on_query_partition_split);\n    register_rpc_handler_with_rpc_holder(RPC_CM_REGISTER_CHILD_REPLICA,\n                                         \"register_child_on_meta\",\n                                         &meta_service::on_register_child_on_meta);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_NOTIFY_STOP_SPLIT, \"notify_stop_split\", &meta_service::on_notify_stop_split);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_QUERY_CHILD_STATE, \"query_child_state\", &meta_service::on_query_child_state);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_START_BULK_LOAD, \"start_bulk_load\", &meta_service::on_start_bulk_load);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_CONTROL_BULK_LOAD, \"control_bulk_load\", &meta_service::on_control_bulk_load);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_BULK_LOAD_STATUS,\n                                         \"query_bulk_load_status\",\n                                         &meta_service::on_query_bulk_load_status);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_CLEAR_BULK_LOAD, \"clear_bulk_load\", &meta_service::on_clear_bulk_load);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_START_BACKUP_APP, \"start_backup_app\", &meta_service::on_start_backup_app);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_QUERY_BACKUP_STATUS, \"query_backup_status\", &meta_service::on_query_backup_status);\n    register_rpc_handler_with_rpc_holder(RPC_CM_START_MANUAL_COMPACT,\n                                         \"start_manual_compact\",\n                                         &meta_service::on_start_manual_compact);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_MANUAL_COMPACT_STATUS,\n                                         \"query_manual_compact_status\",\n                                         &meta_service::on_query_manual_compact_status);\n    register_rpc_handler_with_rpc_holder(RPC_CM_GET_MAX_REPLICA_COUNT,\n                                         \"get_max_replica_count\",\n                                         &meta_service::on_get_max_replica_count);\n    register_rpc_handler_with_rpc_holder(RPC_CM_SET_MAX_REPLICA_COUNT,\n                                         \"set_max_replica_count\",\n                                         &meta_service::on_set_max_replica_count);\n}\n\nint meta_service::check_leader(dsn::message_ex *req, dsn::rpc_address *forward_address)\n{\n    dsn::rpc_address leader;\n    if (!_failure_detector->get_leader(&leader)) {\n        if (!req->header->context.u.is_forward_supported) {\n            if (forward_address != nullptr)\n                *forward_address = leader;\n            return -1;\n        }\n\n        dinfo(\"leader address: %s\", leader.to_string());\n        if (!leader.is_invalid()) {\n            dsn_rpc_forward(req, leader);\n            return 0;\n        } else {\n            if (forward_address != nullptr)\n                forward_address->set_invalid();\n            return -1;\n        }\n    }\n    return 1;\n}\n\n// table operations\nvoid meta_service::on_create_app(dsn::message_ex *req)\n{\n    configuration_create_app_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    req->add_ref();\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::create_app, _state.get(), req),\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_drop_app(dsn::message_ex *req)\n{\n    configuration_drop_app_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    req->add_ref();\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::drop_app, _state.get(), req),\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_recall_app(dsn::message_ex *req)\n{\n    configuration_recall_app_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    req->add_ref();\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::recall_app, _state.get(), req),\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_list_apps(configuration_list_apps_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    _state->list_apps(rpc.request(), rpc.response());\n}\n\nvoid meta_service::on_list_nodes(configuration_list_nodes_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    configuration_list_nodes_response &response = rpc.response();\n    const configuration_list_nodes_request &request = rpc.request();\n    {\n        zauto_lock l(_failure_detector->_lock);\n        dsn::replication::node_info info;\n        if (request.status == node_status::NS_INVALID || request.status == node_status::NS_ALIVE) {\n            info.status = node_status::NS_ALIVE;\n            for (auto &node : _alive_set) {\n                info.address = node;\n                response.infos.push_back(info);\n            }\n        }\n        if (request.status == node_status::NS_INVALID ||\n            request.status == node_status::NS_UNALIVE) {\n            info.status = node_status::NS_UNALIVE;\n            for (auto &node : _dead_set) {\n                info.address = node;\n                response.infos.push_back(info);\n            }\n        }\n        response.err = dsn::ERR_OK;\n    }\n}\n\nvoid meta_service::on_query_cluster_info(configuration_cluster_info_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    std::stringstream oss;\n    configuration_cluster_info_response &response = rpc.response();\n    response.keys.push_back(\"meta_servers\");\n    for (size_t i = 0; i < _opts.meta_servers.size(); ++i) {\n        if (i != 0)\n            oss << \",\";\n        oss << _opts.meta_servers[i].to_string();\n    }\n\n    response.values.push_back(oss.str());\n    response.keys.push_back(\"primary_meta_server\");\n    response.values.push_back(dsn_primary_address().to_std_string());\n    std::string zk_hosts =\n        dsn_config_get_value_string(\"zookeeper\", \"hosts_list\", \"\", \"zookeeper_hosts\");\n    zk_hosts.erase(std::remove_if(zk_hosts.begin(), zk_hosts.end(), ::isspace), zk_hosts.end());\n    response.keys.push_back(\"zookeeper_hosts\");\n    response.values.push_back(zk_hosts);\n    response.keys.push_back(\"zookeeper_root\");\n    response.values.push_back(_cluster_root);\n    response.keys.push_back(\"meta_function_level\");\n    response.values.push_back(\n        _meta_function_level_VALUES_TO_NAMES.find(get_function_level())->second + 3);\n    response.keys.push_back(\"balance_operation_count\");\n    std::vector<std::string> balance_operation_type;\n    balance_operation_type.emplace_back(std::string(\"detail\"));\n    response.values.push_back(_balancer->get_balance_operation_count(balance_operation_type));\n    double primary_stddev, total_stddev;\n    _state->get_cluster_balance_score(primary_stddev, total_stddev);\n    response.keys.push_back(\"primary_replica_count_stddev\");\n    response.values.push_back(fmt::format(\"{:.{}f}\", primary_stddev, 2));\n    response.keys.push_back(\"total_replica_count_stddev\");\n    response.values.push_back(fmt::format(\"{:.{}f}\", total_stddev, 2));\n    response.keys.push_back(\"cluster_name\");\n    response.values.push_back(get_current_cluster_name());\n    response.err = dsn::ERR_OK;\n}\n\n// client => meta server\nvoid meta_service::on_query_configuration_by_index(configuration_query_by_index_rpc rpc)\n{\n    configuration_query_by_index_response &response = rpc.response();\n    rpc_address forward_address;\n    if (!check_status(rpc, &forward_address)) {\n        if (!forward_address.is_invalid()) {\n            partition_configuration config;\n            config.primary = forward_address;\n            response.partitions.push_back(std::move(config));\n        }\n        return;\n    }\n\n    _state->query_configuration_by_index(rpc.request(), response);\n    if (ERR_OK == response.err) {\n        ddebug_f(\"client {} queried an available app {} with appid {}\",\n                 rpc.dsn_request()->header->from_address.to_string(),\n                 rpc.request().app_name,\n                 response.app_id);\n    }\n}\n\n// partition sever => meta sever\n// as get stale configuration is not allowed for partition server, we need to dispatch it to the\n// meta state thread pool\nvoid meta_service::on_config_sync(configuration_query_by_node_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    {\n        // this code piece should be referenced together with meta_service::set_node_state.\n        // In which, the replica server's failure event is dispatched to the meta_state_thread with\n        // the protection\n        // of failure_detector::_lock. Here we use this lock again, to make sure the config_sync rpc\n        // AFTER the node dead is dispatch\n        // AFTER the node dead event\n        zauto_lock l(_failure_detector->_lock);\n        tasking::enqueue(LPC_META_STATE_HIGH,\n                         nullptr,\n                         std::bind(&server_state::on_config_sync, _state.get(), rpc),\n                         server_state::sStateHash);\n    }\n}\n\nvoid meta_service::on_update_configuration(dsn::message_ex *req)\n{\n    configuration_update_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    std::shared_ptr<configuration_update_request> request =\n        std::make_shared<configuration_update_request>();\n    dsn::unmarshall(req, *request);\n\n    meta_function_level::type level = get_function_level();\n    if (level <= meta_function_level::fl_freezed) {\n        response.err = ERR_STATE_FREEZED;\n        _state->query_configuration_by_gpid(request->config.pid, response.config);\n        reply(req, response);\n\n        ddebug(\"refuse request %s coz meta function level is %s\",\n               boost::lexical_cast<std::string>(*request).c_str(),\n               _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n        return;\n    }\n\n    req->add_ref();\n    tasking::enqueue(LPC_META_STATE_HIGH,\n                     nullptr,\n                     std::bind(&server_state::on_update_configuration, _state.get(), request, req),\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_control_meta_level(configuration_meta_control_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    const configuration_meta_control_request &request = rpc.request();\n    configuration_meta_control_response &response = rpc.response();\n    response.err = ERR_OK;\n    response.old_level = _function_level.load();\n    if (request.level == meta_function_level::fl_invalid) {\n        return;\n    }\n\n    if (request.level <= meta_function_level::fl_steady) {\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         nullptr,\n                         std::bind(&server_state::clear_proposals, _state.get()),\n                         server_state::sStateHash);\n    }\n\n    _function_level.store(request.level);\n}\n\nvoid meta_service::on_propose_balancer(configuration_balancer_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    const configuration_balancer_request &request = rpc.request();\n    ddebug(\"get proposal balancer request, gpid(%d.%d)\",\n           request.gpid.get_app_id(),\n           request.gpid.get_partition_index());\n    _state->on_propose_balancer(request, rpc.response());\n}\n\nvoid meta_service::on_start_recovery(configuration_recovery_rpc rpc)\n{\n    configuration_recovery_response &response = rpc.response();\n    ddebug(\"got start recovery request, start to do recovery\");\n    int result = check_leader(rpc, nullptr);\n    // request has been forwarded to others\n    if (result == 0) {\n        return;\n    }\n\n    if (result == -1) {\n        response.err = ERR_FORWARD_TO_OTHERS;\n    } else {\n        zauto_write_lock l(_meta_lock);\n        if (_started.load()) {\n            ddebug(\"service(%s) is already started, ignore the recovery request\",\n                   dsn_primary_address().to_string());\n            response.err = ERR_SERVICE_ALREADY_RUNNING;\n        } else {\n            _state->on_start_recovery(rpc.request(), response);\n            if (response.err == dsn::ERR_OK) {\n                _recovering = false;\n                start_service();\n            }\n        }\n    }\n}\n\nvoid meta_service::on_start_restore(dsn::message_ex *req)\n{\n    configuration_create_app_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    req->add_ref();\n    tasking::enqueue(\n        LPC_RESTORE_BACKGROUND, nullptr, std::bind(&server_state::restore_app, _state.get(), req));\n}\n\nvoid meta_service::on_add_backup_policy(dsn::message_ex *req)\n{\n    configuration_add_backup_policy_response response;\n    if (!check_status_with_msg(req, response)) {\n        return;\n    }\n\n    if (_backup_handler == nullptr) {\n        derror(\"meta doesn't enable backup service\");\n        response.err = ERR_SERVICE_NOT_ACTIVE;\n        reply(req, response);\n    } else {\n        req->add_ref();\n        tasking::enqueue(LPC_DEFAULT_CALLBACK,\n                         nullptr,\n                         std::bind(&backup_service::add_backup_policy, _backup_handler.get(), req));\n    }\n}\n\nvoid meta_service::on_query_backup_policy(query_backup_policy_rpc policy_rpc)\n{\n    if (!check_status(policy_rpc)) {\n        return;\n    }\n\n    auto &response = policy_rpc.response();\n    if (_backup_handler == nullptr) {\n        derror(\"meta doesn't enable backup service\");\n        response.err = ERR_SERVICE_NOT_ACTIVE;\n    } else {\n        tasking::enqueue(\n            LPC_DEFAULT_CALLBACK,\n            nullptr,\n            std::bind(&backup_service::query_backup_policy, _backup_handler.get(), policy_rpc));\n    }\n}\n\nvoid meta_service::on_modify_backup_policy(configuration_modify_backup_policy_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_backup_handler == nullptr) {\n        derror(\"meta doesn't enable backup service\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n    } else {\n        tasking::enqueue(\n            LPC_DEFAULT_CALLBACK,\n            nullptr,\n            std::bind(&backup_service::modify_backup_policy, _backup_handler.get(), rpc));\n    }\n}\n\nvoid meta_service::on_report_restore_status(configuration_report_restore_status_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::on_recv_restore_report, _state.get(), rpc));\n}\n\nvoid meta_service::on_query_restore_status(configuration_query_restore_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::on_query_restore_status, _state.get(), rpc));\n}\n\nvoid meta_service::on_add_duplication(duplication_add_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (!_dup_svc) {\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _dup_svc->add_duplication(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_modify_duplication(duplication_modify_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (!_dup_svc) {\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _dup_svc->modify_duplication(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_query_duplication_info(duplication_query_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_dup_svc) {\n        _dup_svc->query_duplication_info(rpc.request(), rpc.response());\n    } else {\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n    }\n}\n\nvoid meta_service::on_duplication_sync(duplication_sync_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() {\n                         if (_dup_svc) {\n                             _dup_svc->duplication_sync(std::move(rpc));\n                         } else {\n                             rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n                         }\n                     },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::recover_duplication_from_meta_state()\n{\n    if (_dup_svc) {\n        _dup_svc->recover_from_meta_state();\n    }\n}\n\nvoid meta_service::register_duplication_rpc_handlers()\n{\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_ADD_DUPLICATION, \"add_duplication\", &meta_service::on_add_duplication);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_MODIFY_DUPLICATION, \"modify duplication\", &meta_service::on_modify_duplication);\n    register_rpc_handler_with_rpc_holder(RPC_CM_QUERY_DUPLICATION,\n                                         \"query duplication info\",\n                                         &meta_service::on_query_duplication_info);\n    register_rpc_handler_with_rpc_holder(\n        RPC_CM_DUPLICATION_SYNC, \"sync duplication\", &meta_service::on_duplication_sync);\n}\n\nvoid meta_service::initialize_duplication_service()\n{\n    if (_opts.duplication_enabled) {\n        _dup_svc = make_unique<meta_duplication_service>(_state.get(), this);\n    }\n}\n\nvoid meta_service::update_app_env(app_env_rpc env_rpc)\n{\n    if (!check_status(env_rpc)) {\n        return;\n    }\n\n    auto &response = env_rpc.response();\n    app_env_operation::type op = env_rpc.request().op;\n    switch (op) {\n    case app_env_operation::type::APP_ENV_OP_SET:\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         nullptr,\n                         std::bind(&server_state::set_app_envs, _state.get(), env_rpc));\n        break;\n    case app_env_operation::type::APP_ENV_OP_DEL:\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         nullptr,\n                         std::bind(&server_state::del_app_envs, _state.get(), env_rpc));\n        break;\n    case app_env_operation::type::APP_ENV_OP_CLEAR:\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         nullptr,\n                         std::bind(&server_state::clear_app_envs, _state.get(), env_rpc));\n        break;\n    default: // app_env_operation::type::APP_ENV_OP_INVALID\n        dwarn(\"recv a invalid update app_env request, just ignore\");\n        response.err = ERR_INVALID_PARAMETERS;\n        response.hint_message =\n            \"recv a invalid update_app_env request with op = APP_ENV_OP_INVALID\";\n        break;\n    }\n}\n\nvoid meta_service::ddd_diagnose(ddd_diagnose_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    auto &response = rpc.response();\n    get_partition_guardian()->get_ddd_partitions(rpc.request().pid, response.partitions);\n    response.err = ERR_OK;\n}\n\nvoid meta_service::on_start_partition_split(start_split_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    if (_split_svc == nullptr) {\n        derror_f(\"meta doesn't support partition split\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _split_svc->start_partition_split(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_control_partition_split(control_split_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_split_svc == nullptr) {\n        derror_f(\"meta doesn't support partition split\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _split_svc->control_partition_split(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_query_partition_split(query_split_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_split_svc == nullptr) {\n        derror_f(\"meta doesn't support partition split\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _split_svc->query_partition_split(std::move(rpc));\n}\n\nvoid meta_service::on_register_child_on_meta(register_child_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _split_svc->register_child_on_meta(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_notify_stop_split(notify_stop_split_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    if (_split_svc == nullptr) {\n        derror_f(\"meta doesn't support partition split\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _split_svc->notify_stop_split(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_query_child_state(query_child_state_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    if (_split_svc == nullptr) {\n        derror_f(\"meta doesn't support partition split\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _split_svc->query_child_state(std::move(rpc));\n}\n\nvoid meta_service::on_start_bulk_load(start_bulk_load_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_bulk_load_svc == nullptr) {\n        derror_f(\"meta doesn't support bulk load\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _bulk_load_svc->on_start_bulk_load(std::move(rpc));\n}\n\nvoid meta_service::on_control_bulk_load(control_bulk_load_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_bulk_load_svc == nullptr) {\n        derror_f(\"meta doesn't support bulk load\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _bulk_load_svc->on_control_bulk_load(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_query_bulk_load_status(query_bulk_load_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_bulk_load_svc == nullptr) {\n        derror_f(\"meta doesn't support bulk load\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _bulk_load_svc->on_query_bulk_load_status(std::move(rpc));\n}\n\nvoid meta_service::on_clear_bulk_load(clear_bulk_load_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    if (_bulk_load_svc == nullptr) {\n        derror_f(\"meta doesn't support bulk load\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     [this, rpc]() { _bulk_load_svc->on_clear_bulk_load(std::move(rpc)); },\n                     server_state::sStateHash);\n}\n\nvoid meta_service::on_start_backup_app(start_backup_app_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    if (_backup_handler == nullptr) {\n        derror_f(\"meta doesn't enable backup service\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _backup_handler->start_backup_app(std::move(rpc));\n}\n\nvoid meta_service::on_query_backup_status(query_backup_status_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    if (_backup_handler == nullptr) {\n        derror_f(\"meta doesn't enable backup service\");\n        rpc.response().err = ERR_SERVICE_NOT_ACTIVE;\n        return;\n    }\n    _backup_handler->query_backup_status(std::move(rpc));\n}\n\nsize_t meta_service::get_alive_node_count() const\n{\n    zauto_lock l(_failure_detector->_lock);\n    return _alive_set.size();\n}\n\nvoid meta_service::on_start_manual_compact(start_manual_compact_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::on_start_manual_compact, _state.get(), rpc));\n}\n\nvoid meta_service::on_query_manual_compact_status(query_manual_compact_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     nullptr,\n                     std::bind(&server_state::on_query_manual_compact_status, _state.get(), rpc));\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid meta_service::on_get_max_replica_count(configuration_get_max_replica_count_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     std::bind(&server_state::get_max_replica_count, _state.get(), rpc),\n                     server_state::sStateHash);\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid meta_service::on_set_max_replica_count(configuration_set_max_replica_count_rpc rpc)\n{\n    if (!check_status(rpc)) {\n        return;\n    }\n\n    tasking::enqueue(LPC_META_STATE_NORMAL,\n                     tracker(),\n                     std::bind(&server_state::set_max_replica_count, _state.get(), rpc),\n                     server_state::sStateHash);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_service.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     meta server service for EON (rDSN layer 2)\n *\n * Revision history:\n *     2015-03-09, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <memory>\n\n#include <dsn/cpp/serverlet.h>\n#include <dsn/dist/meta_state_service.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n\n#include \"common/replication_common.h\"\n#include \"common/bulk_load_common.h\"\n#include \"common/partition_split_common.h\"\n#include \"common/manual_compact.h\"\n#include \"meta_rpc_types.h\"\n#include \"meta_options.h\"\n#include \"meta_backup_service.h\"\n#include \"meta_state_service_utils.h\"\n#include \"block_service/block_service_manager.h\"\n#include \"partition_guardian.h\"\n\nnamespace dsn {\nnamespace security {\nclass access_controller;\n} // namespace security\nnamespace replication {\n\nclass server_state;\nclass meta_server_failure_detector;\nclass server_load_balancer;\nclass meta_duplication_service;\nclass meta_split_service;\nclass bulk_load_service;\nnamespace test {\nclass test_checker;\n}\n\nDEFINE_TASK_CODE(LPC_DEFAULT_CALLBACK, TASK_PRIORITY_COMMON, dsn::THREAD_POOL_DEFAULT)\n\nenum class meta_op_status\n{\n    FREE = 0,\n    RECALL,\n    BALANCE,\n    BACKUP,\n    BULKLOAD,\n    RESTORE,\n    MANUAL_COMPACT,\n    INVALID\n};\n\nENUM_BEGIN(meta_op_status, meta_op_status::INVALID)\nENUM_REG(meta_op_status::FREE)\nENUM_REG(meta_op_status::RECALL)\nENUM_REG(meta_op_status::BALANCE)\nENUM_REG(meta_op_status::BACKUP)\nENUM_REG(meta_op_status::BULKLOAD)\nENUM_REG(meta_op_status::RESTORE)\nENUM_REG(meta_op_status::MANUAL_COMPACT)\nENUM_END(meta_op_status)\n\nclass meta_service : public serverlet<meta_service>\n{\npublic:\n    meta_service();\n    virtual ~meta_service();\n\n    error_code start();\n    void stop();\n\n    const replication_options &get_options() const { return _opts; }\n    const meta_options &get_meta_options() const { return _meta_opts; }\n\n    /// NOTE: prefer using mss::meta_storage instead.\n    dist::meta_state_service *get_remote_storage() const { return _storage.get(); }\n    mss::meta_storage *get_meta_storage() const { return _meta_storage.get(); }\n\n    server_state *get_server_state() { return _state.get(); }\n    server_load_balancer *get_balancer() { return _balancer.get(); }\n    partition_guardian *get_partition_guardian() { return _partition_guardian.get(); }\n    dist::block_service::block_service_manager &get_block_service_manager()\n    {\n        return _block_service_manager;\n    }\n    bulk_load_service *get_bulk_load_service() { return _bulk_load_svc.get(); }\n\n    meta_function_level::type get_function_level()\n    {\n        meta_function_level::type level = _function_level.load();\n        if (level > meta_function_level::fl_freezed && check_freeze()) {\n            level = meta_function_level::fl_freezed;\n        }\n        return level;\n    }\n    void set_function_level(meta_function_level::type level) { _function_level.store(level); }\n\n    template <typename TResponse>\n    void reply_data(dsn::message_ex *request, const TResponse &data)\n    {\n        dsn::message_ex *response = request->create_response();\n        dsn::marshall(response, data);\n        reply_message(request, response);\n    }\n\n    virtual void reply_message(dsn::message_ex *, dsn::message_ex *response)\n    {\n        dsn_rpc_reply(response);\n    }\n    virtual void send_message(const rpc_address &target, dsn::message_ex *request)\n    {\n        dsn_rpc_call_one_way(target, request);\n    }\n    virtual void send_request(dsn::message_ex * /*req*/,\n                              const rpc_address &target,\n                              const rpc_response_task_ptr &callback)\n    {\n        dsn_rpc_call(target, callback);\n    }\n\n    // these two callbacks are running in fd's thread_pool, and in fd's lock\n    void set_node_state(const std::vector<rpc_address> &nodes_list, bool is_alive);\n    void get_node_state(/*out*/ std::map<rpc_address, bool> &all_nodes);\n\n    void start_service();\n    void balancer_run();\n\n    dsn::task_tracker *tracker() { return &_tracker; }\n\n    size_t get_alive_node_count() const;\n\n    bool try_lock_meta_op_status(meta_op_status op_status);\n    void unlock_meta_op_status();\n    meta_op_status get_op_status() const { return _meta_op_status.load(); }\n\n    std::string get_meta_list_string() const\n    {\n        std::string metas;\n        for (const auto &node : _opts.meta_servers) {\n            metas = fmt::format(\"{}{},\", metas, node.to_string());\n        }\n        return metas.substr(0, metas.length() - 1);\n    }\n\nprivate:\n    void register_rpc_handlers();\n    void register_ctrl_commands();\n    void unregister_ctrl_commands();\n\n    // client => meta server\n    void on_query_configuration_by_index(configuration_query_by_index_rpc rpc);\n\n    // partition server => meta server\n    void on_config_sync(configuration_query_by_node_rpc rpc);\n\n    // update configuration\n    void on_propose_balancer(configuration_balancer_rpc rpc);\n    void on_update_configuration(dsn::message_ex *req);\n\n    // app operations\n    void on_create_app(dsn::message_ex *req);\n    void on_drop_app(dsn::message_ex *req);\n    void on_recall_app(dsn::message_ex *req);\n    void on_list_apps(configuration_list_apps_rpc rpc);\n    void on_list_nodes(configuration_list_nodes_rpc rpc);\n\n    // app env operations\n    void update_app_env(app_env_rpc env_rpc);\n\n    // ddd diagnose\n    void ddd_diagnose(ddd_diagnose_rpc rpc);\n\n    // cluster info\n    void on_query_cluster_info(configuration_cluster_info_rpc rpc);\n\n    // meta control\n    void on_control_meta_level(configuration_meta_control_rpc rpc);\n    void on_start_recovery(configuration_recovery_rpc rpc);\n\n    // backup/restore\n    void on_start_backup_app(start_backup_app_rpc rpc);\n    void on_query_backup_status(query_backup_status_rpc rpc);\n    void on_start_restore(dsn::message_ex *req);\n    void on_add_backup_policy(dsn::message_ex *req);\n    void on_query_backup_policy(query_backup_policy_rpc policy_rpc);\n    void on_modify_backup_policy(configuration_modify_backup_policy_rpc rpc);\n    void on_report_restore_status(configuration_report_restore_status_rpc rpc);\n    void on_query_restore_status(configuration_query_restore_rpc rpc);\n\n    // duplication\n    void on_add_duplication(duplication_add_rpc rpc);\n    void on_modify_duplication(duplication_modify_rpc rpc);\n    void on_query_duplication_info(duplication_query_rpc rpc);\n    void on_duplication_sync(duplication_sync_rpc rpc);\n    void register_duplication_rpc_handlers();\n    void recover_duplication_from_meta_state();\n    void initialize_duplication_service();\n\n    // split\n    void on_start_partition_split(start_split_rpc rpc);\n    void on_control_partition_split(control_split_rpc rpc);\n    void on_query_partition_split(query_split_rpc rpc);\n    void on_register_child_on_meta(register_child_rpc rpc);\n    void on_notify_stop_split(notify_stop_split_rpc rpc);\n    void on_query_child_state(query_child_state_rpc rpc);\n\n    // bulk load\n    void on_start_bulk_load(start_bulk_load_rpc rpc);\n    void on_control_bulk_load(control_bulk_load_rpc rpc);\n    void on_query_bulk_load_status(query_bulk_load_rpc rpc);\n    void on_clear_bulk_load(clear_bulk_load_rpc rpc);\n\n    // manual compaction\n    void on_start_manual_compact(start_manual_compact_rpc rpc);\n    void on_query_manual_compact_status(query_manual_compact_rpc rpc);\n\n    // get/set max_replica_count of an app\n    void on_get_max_replica_count(configuration_get_max_replica_count_rpc rpc);\n    void on_set_max_replica_count(configuration_set_max_replica_count_rpc rpc);\n\n    // common routines\n    // ret:\n    //   1. the meta is leader\n    //   0. meta isn't leader, and rpc-msg can forward to others\n    //  -1. meta isn't leader, and rpc-msg can't forward to others\n    // if return -1 and `forward_address' != nullptr, then return leader by `forward_address'.\n    int check_leader(dsn::message_ex *req, dsn::rpc_address *forward_address);\n    template <typename TRpcHolder>\n    int check_leader(TRpcHolder rpc, /*out*/ rpc_address *forward_address);\n    // ret:\n    //    false: check failed\n    //    true:  check succeed\n    template <typename TRpcHolder>\n    bool check_status(TRpcHolder rpc, /*out*/ rpc_address *forward_address = nullptr);\n    template <typename TRespType>\n    bool check_status_with_msg(message_ex *req, TRespType &response_struct);\n\n    error_code remote_storage_initialize();\n    bool check_freeze() const;\n\nprivate:\n    friend class backup_engine_test;\n    friend class backup_service_test;\n    friend class bulk_load_service_test;\n    friend class meta_backup_service_test;\n    friend class meta_backup_test_base;\n    friend class meta_duplication_service;\n    friend class meta_http_service;\n    friend class meta_http_service_test;\n    friend class meta_partition_guardian_test;\n    friend class meta_service_test;\n    friend class meta_service_test_app;\n    friend class meta_split_service_test;\n    friend class meta_test_base;\n    friend class policy_context_test;\n    friend class server_state_restore_test;\n    friend class test::test_checker;\n\n    replication_options _opts;\n    meta_options _meta_opts;\n    uint64_t _node_live_percentage_threshold_for_update;\n    dsn_handle_t _ctrl_node_live_percentage_threshold_for_update = nullptr;\n\n    std::shared_ptr<server_state> _state;\n    std::shared_ptr<meta_server_failure_detector> _failure_detector;\n\n    std::shared_ptr<dist::meta_state_service> _storage;\n    std::unique_ptr<mss::meta_storage> _meta_storage;\n\n    std::shared_ptr<server_load_balancer> _balancer;\n    std::shared_ptr<backup_service> _backup_handler;\n    std::shared_ptr<partition_guardian> _partition_guardian = nullptr;\n\n    std::unique_ptr<meta_duplication_service> _dup_svc;\n\n    std::unique_ptr<meta_split_service> _split_svc;\n\n    std::unique_ptr<bulk_load_service> _bulk_load_svc;\n\n    // handle all the block filesystems for current meta service\n    // (in other words, current service node)\n    dist::block_service::block_service_manager _block_service_manager;\n\n    // [\n    // this is protected by failure_detector::_lock\n    std::set<rpc_address> _alive_set;\n    std::set<rpc_address> _dead_set;\n    // ]\n    mutable zrwlock_nr _meta_lock;\n\n    std::atomic_bool _started;\n    std::atomic_bool _recovering;\n    // reference replication.thrift for what the meta_function_level means\n    std::atomic<meta_function_level::type> _function_level;\n\n    std::string _cluster_root;\n\n    perf_counter_wrapper _recent_disconnect_count;\n    perf_counter_wrapper _unalive_nodes_count;\n    perf_counter_wrapper _alive_nodes_count;\n\n    dsn::task_tracker _tracker;\n\n    std::unique_ptr<security::access_controller> _access_controller;\n\n    // indicate which operation is processeding in meta server\n    std::atomic<meta_op_status> _meta_op_status;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_service_app.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/dist/replication/meta_service_app.h>\n#include <dsn/http/http_server.h>\n\n#include \"distributed_lock_service_simple.h\"\n#include \"meta_state_service_simple.h\"\n\n#include \"zookeeper/distributed_lock_service_zookeeper.h\"\n#include \"meta_state_service_zookeeper.h\"\n\n#include \"server_load_balancer.h\"\n#include \"greedy_load_balancer.h\"\n\n#include \"meta_service.h\"\n#include \"meta_http_service.h\"\n#include \"http/service_version.h\"\n\nnamespace dsn {\nnamespace service {\n\n#define register_component(name, base_type, derived_type)                                          \\\n    do {                                                                                           \\\n        utils::factory_store<base_type>::register_factory(                                         \\\n            name, base_type::create<derived_type>, PROVIDER_TYPE_MAIN);                            \\\n    } while (0)\n\nvoid meta_service_app::register_components()\n{\n    register_component(\"distributed_lock_service_simple\",\n                       dist::distributed_lock_service,\n                       dist::distributed_lock_service_simple);\n\n    register_component(\"distributed_lock_service_zookeeper\",\n                       dist::distributed_lock_service,\n                       dist::distributed_lock_service_zookeeper);\n\n    register_component(\n        \"meta_state_service_simple\", dist::meta_state_service, dist::meta_state_service_simple);\n    register_component(\"meta_state_service_zookeeper\",\n                       dist::meta_state_service,\n                       dist::meta_state_service_zookeeper);\n\n    register_component(\"greedy_load_balancer\",\n                       replication::server_load_balancer,\n                       replication::greedy_load_balancer);\n    register_component(\n        \"partition_guardian\", replication::partition_guardian, replication::partition_guardian);\n}\n\nvoid meta_service_app::register_all()\n{\n    dsn::service_app::register_factory<meta_service_app>(\"meta\");\n    register_components();\n}\n\nmeta_service_app::meta_service_app(const service_app_info *info) : service_app(info)\n{\n    // create in constructor because it may be used in checker before started\n    _service.reset(new replication::meta_service());\n\n    // add http service\n    register_http_service(new replication::meta_http_service(_service.get()));\n    start_http_server();\n}\n\nmeta_service_app::~meta_service_app() {}\n\nerror_code meta_service_app::start(const std::vector<std::string> &args)\n{\n    if (args.size() >= 2) {\n        app_version.version = *(args.end() - 2);\n        app_version.git_commit = *(args.end() - 1);\n    }\n    return _service->start();\n}\n\nerror_code meta_service_app::stop(bool /*cleanup*/)\n{\n    _service->stop();\n    return ERR_OK;\n}\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_split_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"meta_split_service.h\"\n#include \"meta_state_service_utils.h\"\n\nnamespace dsn {\nnamespace replication {\n\nmeta_split_service::meta_split_service(meta_service *meta_srv)\n{\n    _meta_svc = meta_srv;\n    _state = meta_srv->get_server_state();\n}\n\nvoid meta_split_service::start_partition_split(start_split_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n    response.err = ERR_OK;\n\n    std::shared_ptr<app_state> app;\n    {\n        zauto_write_lock l(app_lock());\n\n        app = _state->get_app(request.app_name);\n        if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n            derror_f(\"app({}) is not existed or not available\", request.app_name);\n            response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n            response.hint_msg = fmt::format(\n                \"app {}\", response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"dropped\");\n            return;\n        }\n\n        // new_partition_count != old_partition_count*2\n        if (request.new_partition_count != app->partition_count * 2) {\n            response.err = ERR_INVALID_PARAMETERS;\n            derror_f(\"wrong partition count: app({}), partition count({}), new_partition_count({})\",\n                     request.app_name,\n                     app->partition_count,\n                     request.new_partition_count);\n            response.hint_msg =\n                fmt::format(\"wrong partition_count, should be {}\", app->partition_count * 2);\n            return;\n        }\n\n        if (app->splitting()) {\n            response.err = ERR_BUSY;\n            auto err_msg =\n                fmt::format(\"app({}) is already executing partition split\", request.app_name);\n            derror_f(\"{}\", err_msg);\n            response.hint_msg = err_msg;\n            return;\n        }\n    }\n\n    ddebug_f(\"app({}) start to partition split, new_partition_count={}\",\n             request.app_name,\n             request.new_partition_count);\n\n    do_start_partition_split(std::move(app), std::move(rpc));\n}\n\nvoid meta_split_service::do_start_partition_split(std::shared_ptr<app_state> app,\n                                                  start_split_rpc rpc)\n{\n    auto on_write_storage_complete = [app, rpc, this]() {\n        ddebug_f(\"app({}) update partition count on remote storage, new partition_count = {}\",\n                 app->app_name,\n                 app->partition_count * 2);\n\n        zauto_write_lock l(app_lock());\n        app->helpers->split_states.splitting_count = app->partition_count;\n        app->partition_count *= 2;\n        app->helpers->contexts.resize(app->partition_count);\n        app->partitions.resize(app->partition_count);\n        app->envs[replica_envs::SPLIT_VALIDATE_PARTITION_HASH] = \"true\";\n\n        for (int i = 0; i < app->partition_count; ++i) {\n            app->helpers->contexts[i].config_owner = &app->partitions[i];\n            if (i >= app->partition_count / 2) { // child partitions\n                app->partitions[i].ballot = invalid_ballot;\n                app->partitions[i].pid = gpid(app->app_id, i);\n            } else { // parent partitions\n                app->helpers->split_states.status[i] = split_status::SPLITTING;\n            }\n        }\n\n        auto &response = rpc.response();\n        response.err = ERR_OK;\n    };\n\n    if (app->init_partition_count <= 0) {\n        app->init_partition_count = app->partition_count;\n    }\n    auto copy = *app;\n    copy.partition_count *= 2;\n    copy.envs[replica_envs::SPLIT_VALIDATE_PARTITION_HASH] = \"true\";\n    blob value = dsn::json::json_forwarder<app_info>::encode(copy);\n    _meta_svc->get_meta_storage()->set_data(\n        _state->get_app_path(*app), std::move(value), on_write_storage_complete);\n}\n\nvoid meta_split_service::register_child_on_meta(register_child_rpc rpc)\n{\n    const auto &request = rpc.request();\n    const std::string &app_name = request.app.app_name;\n    auto &response = rpc.response();\n    response.err = ERR_IO_PENDING;\n\n    zauto_write_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(app_name);\n    dassert_f(app != nullptr, \"app({}) is not existed\", app_name);\n    dassert_f(app->is_stateful, \"app({}) is stateless currently\", app_name);\n\n    const gpid &parent_gpid = request.parent_config.pid;\n    const gpid &child_gpid = request.child_config.pid;\n    const auto &parent_config = app->partitions[parent_gpid.get_partition_index()];\n    if (request.parent_config.ballot != parent_config.ballot) {\n        derror_f(\"app({}) partition({}) register child({}) failed, request is outdated, request \"\n                 \"parent ballot = {}, local parent ballot = {}\",\n                 app_name,\n                 parent_gpid,\n                 child_gpid,\n                 request.parent_config.ballot,\n                 parent_config.ballot);\n        response.err = ERR_INVALID_VERSION;\n        response.parent_config = parent_config;\n        return;\n    }\n\n    config_context &parent_context = app->helpers->contexts[parent_gpid.get_partition_index()];\n    if (parent_context.stage == config_status::pending_remote_sync) {\n        dwarn_f(\"app({}) partition({}): another request is syncing with remote storage, ignore \"\n                \"this request\",\n                app_name,\n                parent_gpid);\n        return;\n    }\n\n    if (child_gpid.get_partition_index() >= app->partition_count) {\n        derror_f(\n            \"app({}) partition({}) register child({}) failed, partition split has been canceled\",\n            app_name,\n            parent_gpid,\n            child_gpid);\n        response.err = ERR_INVALID_STATE;\n        response.parent_config = parent_config;\n        return;\n    }\n\n    auto iter = app->helpers->split_states.status.find(parent_gpid.get_partition_index());\n    if (iter == app->helpers->split_states.status.end()) {\n        derror_f(\n            \"duplicated register request, app({}) child partition({}) has already been registered\",\n            app_name,\n            child_gpid);\n        const auto &child_config = app->partitions[child_gpid.get_partition_index()];\n        dassert_f(child_config.ballot > 0,\n                  \"app({}) partition({}) should have been registered\",\n                  app_name,\n                  child_gpid);\n        response.err = ERR_CHILD_REGISTERED;\n        response.parent_config = parent_config;\n        return;\n    }\n\n    if (iter->second != split_status::SPLITTING) {\n        derror_f(\n            \"app({}) partition({}) register child({}) failed, current partition split_status = {}\",\n            app_name,\n            parent_gpid,\n            child_gpid,\n            dsn::enum_to_string(iter->second));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    app->helpers->split_states.status.erase(parent_gpid.get_partition_index());\n    app->helpers->split_states.splitting_count--;\n    ddebug_f(\"app({}) parent({}) will register child({})\", app_name, parent_gpid, child_gpid);\n\n    parent_context.stage = config_status::pending_remote_sync;\n    parent_context.msg = rpc.dsn_request();\n    parent_context.pending_sync_task = add_child_on_remote_storage(rpc, true);\n}\n\ndsn::task_ptr meta_split_service::add_child_on_remote_storage(register_child_rpc rpc,\n                                                              bool create_new)\n{\n    const auto &request = rpc.request();\n    const std::string &partition_path = _state->get_partition_path(request.child_config.pid);\n    blob value = dsn::json::json_forwarder<partition_configuration>::encode(request.child_config);\n    if (create_new) {\n        return _meta_svc->get_remote_storage()->create_node(\n            partition_path,\n            LPC_META_STATE_HIGH,\n            std::bind(&meta_split_service::on_add_child_on_remote_storage_reply,\n                      this,\n                      std::placeholders::_1,\n                      rpc,\n                      create_new),\n            value);\n    } else {\n        return _meta_svc->get_remote_storage()->set_data(\n            partition_path,\n            value,\n            LPC_META_STATE_HIGH,\n            std::bind(&meta_split_service::on_add_child_on_remote_storage_reply,\n                      this,\n                      std::placeholders::_1,\n                      rpc,\n                      create_new),\n            _meta_svc->tracker());\n    }\n}\n\nvoid meta_split_service::on_add_child_on_remote_storage_reply(error_code ec,\n                                                              register_child_rpc rpc,\n                                                              bool create_new)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n    const std::string &app_name = request.app.app_name;\n\n    zauto_write_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(app_name);\n    dassert_f(app != nullptr, \"app({}) is not existed\", app_name);\n    dassert_f(app->is_stateful, \"app({}) is stateless currently\", app_name);\n\n    const gpid &parent_gpid = request.parent_config.pid;\n    const gpid &child_gpid = request.child_config.pid;\n    config_context &parent_context = app->helpers->contexts[parent_gpid.get_partition_index()];\n\n    if (ec == ERR_TIMEOUT ||\n        (ec == ERR_NODE_ALREADY_EXIST && create_new)) { // retry register child on remote storage\n        bool retry_create_new = (ec == ERR_TIMEOUT) ? create_new : false;\n        int delay = (ec == ERR_TIMEOUT) ? 1 : 0;\n        parent_context.pending_sync_task =\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             nullptr,\n                             [this, parent_context, rpc, retry_create_new]() mutable {\n                                 parent_context.pending_sync_task =\n                                     add_child_on_remote_storage(rpc, retry_create_new);\n                             },\n                             0,\n                             std::chrono::seconds(delay));\n        return;\n    }\n    dassert_f(ec == ERR_OK, \"we can't handle this right now, err = {}\", ec);\n\n    ddebug_f(\"parent({}) resgiter child({}) on remote storage succeed\", parent_gpid, child_gpid);\n\n    // update local child partition configuration\n    std::shared_ptr<configuration_update_request> update_child_request =\n        std::make_shared<configuration_update_request>();\n    update_child_request->config = request.child_config;\n    update_child_request->info = *app;\n    update_child_request->type = config_type::CT_REGISTER_CHILD;\n    update_child_request->node = request.primary_address;\n\n    partition_configuration child_config = app->partitions[child_gpid.get_partition_index()];\n    child_config.secondaries = request.child_config.secondaries;\n    _state->update_configuration_locally(*app, update_child_request);\n\n    if (parent_context.msg) {\n        response.err = ERR_OK;\n        response.app = *app;\n        response.parent_config = app->partitions[parent_gpid.get_partition_index()];\n        response.child_config = app->partitions[child_gpid.get_partition_index()];\n        parent_context.msg = nullptr;\n    }\n    parent_context.pending_sync_task = nullptr;\n    parent_context.stage = config_status::not_pending;\n}\n\nvoid meta_split_service::query_partition_split(query_split_rpc rpc) const\n{\n    const std::string &app_name = rpc.request().app_name;\n    auto &response = rpc.response();\n    response.err = ERR_OK;\n\n    zauto_read_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.__set_hint_msg(fmt::format(\n            \"app({}) {}\", app_name, response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"dropped\"));\n        derror_f(\"query partition split failed, {}\", response.hint_msg);\n        return;\n    }\n\n    if (!app->splitting()) {\n        response.err = ERR_INVALID_STATE;\n        response.__set_hint_msg(fmt::format(\"app({}) is not splitting\", app_name));\n        derror_f(\"query partition split failed, {}\", response.hint_msg);\n        return;\n    }\n\n    response.new_partition_count = app->partition_count;\n    response.status = app->helpers->split_states.status;\n    ddebug_f(\"query partition split succeed, app({}), partition_count({}), splitting_count({})\",\n             app->app_name,\n             response.new_partition_count,\n             response.status.size());\n}\n\nvoid meta_split_service::control_partition_split(control_split_rpc rpc)\n{\n    const auto &req = rpc.request();\n    const auto &control_type = req.control_type;\n    auto &response = rpc.response();\n\n    zauto_write_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(req.app_name);\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.__set_hint_msg(fmt::format(\n            \"app {}\", response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"dropped\", req.app_name));\n        derror_f(\"{} split failed, {}\", control_type_str(control_type), response.hint_msg);\n        return;\n    }\n\n    if (!app->splitting()) {\n        response.err = ERR_INVALID_STATE;\n        response.__set_hint_msg(fmt::format(\"app({}) is not splitting\", req.app_name));\n        derror_f(\"{} split failed, {}\", control_type_str(control_type), response.hint_msg);\n        return;\n    }\n\n    if (req.parent_pidx >= 0 && (control_type == split_control_type::PAUSE ||\n                                 control_type == split_control_type::RESTART)) {\n        do_control_single(std::move(app), std::move(rpc));\n    } else {\n        do_control_all(std::move(app), std::move(rpc));\n    }\n}\n\nvoid meta_split_service::do_control_single(std::shared_ptr<app_state> app, control_split_rpc rpc)\n{\n    const auto &req = rpc.request();\n    const std::string &app_name = req.app_name;\n    const int32_t &parent_pidx = req.parent_pidx;\n    const auto &control_type = req.control_type;\n    auto &response = rpc.response();\n\n    if (parent_pidx >= app->partition_count / 2) {\n        response.err = ERR_INVALID_PARAMETERS;\n        response.__set_hint_msg(fmt::format(\"invalid parent partition index({})\", parent_pidx));\n        derror_f(\"{} split for app({}) failed, {}\",\n                 control_type_str(control_type),\n                 app_name,\n                 response.hint_msg);\n        return;\n    }\n\n    auto iter = app->helpers->split_states.status.find(parent_pidx);\n    if (iter == app->helpers->split_states.status.end()) {\n        response.err =\n            control_type == split_control_type::PAUSE ? ERR_CHILD_REGISTERED : ERR_INVALID_STATE;\n        response.__set_hint_msg(fmt::format(\"partition[{}] is not splitting\", parent_pidx));\n        derror_f(\"{} split for app({}) failed, {}\",\n                 control_type_str(control_type),\n                 app_name,\n                 response.hint_msg);\n        return;\n    }\n\n    split_status::type old_status =\n        control_type == split_control_type::PAUSE ? split_status::SPLITTING : split_status::PAUSED;\n    split_status::type target_status =\n        control_type == split_control_type::PAUSE ? split_status::PAUSING : split_status::SPLITTING;\n    if (iter->second == old_status) {\n        iter->second = target_status;\n        response.err = ERR_OK;\n        ddebug_f(\"app({}) partition[{}] {} split succeed\",\n                 app_name,\n                 parent_pidx,\n                 control_type_str(control_type));\n    } else {\n        response.err = ERR_INVALID_STATE;\n        response.__set_hint_msg(fmt::format(\"partition[{}] wrong split_status({})\",\n                                            parent_pidx,\n                                            dsn::enum_to_string(iter->second)));\n        derror_f(\"{} split for app({}) failed, {}\",\n                 control_type_str(control_type),\n                 app_name,\n                 response.hint_msg);\n    }\n}\n\nvoid meta_split_service::do_control_all(std::shared_ptr<app_state> app, control_split_rpc rpc)\n{\n    const auto &req = rpc.request();\n    const auto &control_type = req.control_type;\n    auto &response = rpc.response();\n\n    if (control_type == split_control_type::CANCEL) {\n        if (req.old_partition_count != app->partition_count / 2) {\n            response.err = ERR_INVALID_PARAMETERS;\n            response.__set_hint_msg(\n                fmt::format(\"wrong partition_count, should be {}\", app->partition_count / 2));\n            derror_f(\"cancel split for app({}) failed, wrong partition count: partition count({}) \"\n                     \"VS req partition_count({})\",\n                     app->app_name,\n                     app->partition_count,\n                     req.old_partition_count);\n            return;\n        }\n\n        if (app->helpers->split_states.splitting_count != req.old_partition_count) {\n            response.err = ERR_CHILD_REGISTERED;\n            response.__set_hint_msg(\"some partitions have already finished split\");\n            derror_f(\"cancel split for app({}) failed, {}\", app->app_name, response.hint_msg);\n            return;\n        }\n\n        for (auto &kv : app->helpers->split_states.status) {\n            ddebug_f(\"app({}) partition({}) cancel split, old status = {}\",\n                     app->app_name,\n                     kv.first,\n                     dsn::enum_to_string(kv.second));\n            kv.second = split_status::CANCELING;\n        }\n        return;\n    }\n\n    split_status::type old_status =\n        control_type == split_control_type::PAUSE ? split_status::SPLITTING : split_status::PAUSED;\n    split_status::type target_status =\n        control_type == split_control_type::PAUSE ? split_status::PAUSING : split_status::SPLITTING;\n    for (auto &kv : app->helpers->split_states.status) {\n        if (kv.second == old_status) {\n            kv.second = target_status;\n            ddebug_f(\"app({}) partition[{}] {} split succeed\",\n                     app->app_name,\n                     kv.first,\n                     control_type_str(control_type));\n        }\n    }\n    response.err = ERR_OK;\n}\n\nvoid meta_split_service::notify_stop_split(notify_stop_split_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n    zauto_write_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(request.app_name);\n    dassert_f(app != nullptr, \"app({}) is not existed\", request.app_name);\n    dassert_f(app->is_stateful, \"app({}) is stateless currently\", request.app_name);\n    dassert_f(request.meta_split_status == split_status::PAUSING ||\n                  request.meta_split_status == split_status::CANCELING,\n              \"invalid split_status({})\",\n              dsn::enum_to_string(request.meta_split_status));\n\n    const std::string &stop_type =\n        rpc.request().meta_split_status == split_status::PAUSING ? \"pause\" : \"cancel\";\n    const auto iter =\n        app->helpers->split_states.status.find(request.parent_gpid.get_partition_index());\n    if (iter == app->helpers->split_states.status.end()) {\n        dwarn_f(\"app({}) partition({}) is not executing partition split, ignore out-dated {} split \"\n                \"request\",\n                app->app_name,\n                request.parent_gpid,\n                stop_type);\n        response.err = ERR_INVALID_VERSION;\n        return;\n    }\n\n    if (iter->second != request.meta_split_status) {\n        dwarn_f(\"app({}) partition({}) split_status = {}, ignore out-dated {} split request\",\n                app->app_name,\n                request.parent_gpid,\n                dsn::enum_to_string(iter->second),\n                stop_type);\n        response.err = ERR_INVALID_VERSION;\n        return;\n    }\n\n    ddebug_f(\"app({}) partition({}) notify {} split succeed\",\n             app->app_name,\n             request.parent_gpid,\n             stop_type);\n\n    // pausing split\n    if (iter->second == split_status::PAUSING) {\n        iter->second = split_status::PAUSED;\n        response.err = ERR_OK;\n        return;\n    }\n\n    // canceling split\n    dassert_f(request.partition_count * 2 == app->partition_count,\n              \"wrong partition_count, request({}) vs meta({})\",\n              request.partition_count,\n              app->partition_count);\n    app->helpers->split_states.status.erase(request.parent_gpid.get_partition_index());\n    response.err = ERR_OK;\n    // when all partitions finish, partition_count should be updated\n    if (--app->helpers->split_states.splitting_count == 0) {\n        do_cancel_partition_split(std::move(app), rpc);\n    }\n}\n\nvoid meta_split_service::do_cancel_partition_split(std::shared_ptr<app_state> app,\n                                                   notify_stop_split_rpc rpc)\n{\n    auto on_write_storage_complete = [app, rpc, this]() {\n        ddebug_f(\"app({}) update partition count on remote storage, new partition count is {}\",\n                 app->app_name,\n                 app->partition_count / 2);\n        zauto_write_lock l(app_lock());\n        app->partition_count /= 2;\n        app->helpers->contexts.resize(app->partition_count);\n        app->partitions.resize(app->partition_count);\n    };\n\n    auto copy = *app;\n    copy.partition_count = rpc.request().partition_count;\n    blob value = dsn::json::json_forwarder<app_info>::encode(copy);\n    _meta_svc->get_meta_storage()->set_data(\n        _state->get_app_path(*app), std::move(value), on_write_storage_complete);\n}\n\nvoid meta_split_service::query_child_state(query_child_state_rpc rpc)\n{\n    const auto &request = rpc.request();\n    const auto &app_name = request.app_name;\n    const auto &parent_pid = request.pid;\n    auto &response = rpc.response();\n\n    zauto_read_lock l(app_lock());\n    std::shared_ptr<app_state> app = _state->get_app(app_name);\n    dassert_f(app != nullptr, \"app({}) is not existed\", app_name);\n    dassert_f(app->is_stateful, \"app({}) is stateless currently\", app_name);\n\n    if (app->partition_count == request.partition_count) {\n        response.err = ERR_INVALID_STATE;\n        derror_f(\"app({}) is not executing partition split\", app_name);\n        return;\n    }\n\n    dassert_f(app->partition_count == request.partition_count * 2,\n              \"app({}) has invalid partition_count\",\n              app_name);\n\n    auto child_pidx = parent_pid.get_partition_index() + request.partition_count;\n    if (app->partitions[child_pidx].ballot == invalid_ballot) {\n        response.err = ERR_INVALID_STATE;\n        derror_f(\"app({}) parent partition({}) split has been canceled\", app_name, parent_pid);\n        return;\n    }\n    ddebug_f(\n        \"app({}) child partition({}.{}) is ready\", app_name, parent_pid.get_app_id(), child_pidx);\n    response.err = ERR_OK;\n    response.__set_partition_count(app->partition_count);\n    response.__set_child_config(app->partitions[child_pidx]);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_split_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"meta_service.h\"\n#include \"server_state.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_split_service\n{\npublic:\n    explicit meta_split_service(meta_service *meta);\n\nprivate:\n    // client -> meta to start split\n    void start_partition_split(start_split_rpc rpc);\n    void do_start_partition_split(std::shared_ptr<app_state> app, start_split_rpc rpc);\n\n    // client -> meta to query split\n    void query_partition_split(query_split_rpc rpc) const;\n\n    // client -> meta to pause/restart/cancel split\n    void control_partition_split(control_split_rpc rpc);\n\n    // pause/restart specific one partition\n    void do_control_single(std::shared_ptr<app_state> app, control_split_rpc rpc);\n\n    // pause all splitting partitions or restart all paused partitions or cancel all partitions\n    void do_control_all(std::shared_ptr<app_state> app, control_split_rpc rpc);\n\n    // primary parent -> meta_server to register child\n    void register_child_on_meta(register_child_rpc rpc);\n\n    // meta -> remote storage to update child replica config\n    dsn::task_ptr add_child_on_remote_storage(register_child_rpc rpc, bool create_new);\n    void\n    on_add_child_on_remote_storage_reply(error_code ec, register_child_rpc rpc, bool create_new);\n\n    // primary replica -> meta to notify group pause or cancel split succeed\n    void notify_stop_split(notify_stop_split_rpc rpc);\n    void do_cancel_partition_split(std::shared_ptr<app_state> app, notify_stop_split_rpc rpc);\n\n    // primary replica -> meta to query child state\n    void query_child_state(query_child_state_rpc rpc);\n\n    static const std::string control_type_str(split_control_type::type type)\n    {\n        std::string str = \"\";\n        if (type == split_control_type::PAUSE) {\n            str = \"pause\";\n        } else if (type == split_control_type::RESTART) {\n            str = \"restart\";\n        } else if (type == split_control_type::CANCEL) {\n            str = \"cancel\";\n        }\n        return str;\n    }\n\nprivate:\n    friend class meta_service;\n    friend class meta_split_service_test;\n\n    meta_service *_meta_svc;\n    server_state *_state;\n\n    zrwlock_nr &app_lock() const { return _state->_lock; }\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_state_service_simple.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"meta_state_service_simple.h\"\n\n#include <fcntl.h>\n\n#include <stack>\n#include <utility>\n\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utility/filesystem.h>\n\nnamespace dsn {\nnamespace dist {\n// path: /, /n1/n2, /n1/n2/, /n2/n2/n3\nstd::string meta_state_service_simple::normalize_path(const std::string &s)\n{\n    if (s.empty() || s[0] != '/')\n        return \"\";\n    if (s.length() > 1 && *s.rbegin() == '/')\n        return s.substr(0, s.length() - 1);\n    return s;\n}\n\nerror_code meta_state_service_simple::extract_name_parent_from_path(const std::string &s,\n                                                                    /*out*/ std::string &name,\n                                                                    /*out*/ std::string &parent)\n{\n    auto pos = s.find_last_of('/');\n    if (pos == std::string::npos)\n        return ERR_INVALID_PARAMETERS;\n\n    name = s.substr(pos + 1);\n    if (pos > 0)\n        parent = s.substr(0, pos);\n    else\n        parent = \"/\";\n    return ERR_OK;\n}\n\nstatic void\n__err_cb_bind_and_enqueue(task_ptr lock_task, error_code err, int delay_milliseconds = 0)\n{\n    auto t = dynamic_cast<error_code_future *>(lock_task.get());\n    t->enqueue_with(err, delay_milliseconds);\n}\n\nvoid meta_state_service_simple::write_log(blob &&log_blob,\n                                          std::function<error_code()> internal_operation,\n                                          task_ptr task)\n{\n    _log_lock.lock();\n    uint64_t log_offset = _offset;\n    _offset += log_blob.length();\n    auto continuation_task = std::unique_ptr<operation>(new operation(false, [=](bool log_succeed) {\n        dassert(log_succeed, \"we cannot handle logging failure now\");\n        __err_cb_bind_and_enqueue(task, internal_operation(), 0);\n    }));\n    auto continuation_task_ptr = continuation_task.get();\n    _task_queue.emplace(move(continuation_task));\n    _log_lock.unlock();\n\n    file::write(_log,\n                log_blob.data(),\n                log_blob.length(),\n                log_offset,\n                LPC_META_STATE_SERVICE_SIMPLE_INTERNAL,\n                &_tracker,\n                [=](error_code err, size_t bytes) {\n                    dassert(err == ERR_OK && bytes == log_blob.length(),\n                            \"we cannot handle logging failure now\");\n                    _log_lock.lock();\n                    continuation_task_ptr->done = true;\n                    while (!_task_queue.empty()) {\n                        if (!_task_queue.front()->done) {\n                            break;\n                        }\n                        _task_queue.front()->cb(true);\n                        _task_queue.pop();\n                    }\n                    _log_lock.unlock();\n                });\n}\n\nerror_code meta_state_service_simple::create_node_internal(const std::string &node,\n                                                           const blob &value)\n{\n    auto path = normalize_path(node);\n    zauto_lock _(_state_lock);\n    auto me_it = _quick_map.find(path);\n    if (me_it != _quick_map.end())\n        return ERR_NODE_ALREADY_EXIST;\n\n    std::string name, parent;\n    auto err = extract_name_parent_from_path(path, name, parent);\n    if (err != ERR_OK) {\n        return err;\n    }\n\n    auto parent_it = _quick_map.find(parent);\n    if (parent_it == _quick_map.end())\n        return ERR_OBJECT_NOT_FOUND;\n\n    state_node *n = new state_node(name, parent_it->second, value);\n    parent_it->second->children.insert(quick_map::value_type(name, n));\n    _quick_map.insert(quick_map::value_type(path, n));\n    return ERR_OK;\n}\n\nerror_code meta_state_service_simple::delete_node_internal(const std::string &node, bool recursive)\n{\n    auto path = normalize_path(node);\n    if (path == \"/\")\n        return ERR_INVALID_PARAMETERS; // cannot delete root\n    zauto_lock _(_state_lock);\n    auto me_it = _quick_map.find(path);\n    if (me_it == _quick_map.end())\n        return ERR_OBJECT_NOT_FOUND;\n    if (!recursive && !me_it->second->children.empty())\n        return ERR_INVALID_PARAMETERS;\n\n    struct delete_state\n    {\n        std::string path;\n        state_node *node;\n        decltype(state_node::children)::iterator next_child_to_delete;\n    };\n    std::stack<delete_state> delete_stack;\n    delete_stack.push({path, me_it->second, me_it->second->children.begin()});\n    for (; !delete_stack.empty();) {\n        auto &node_pair = delete_stack.top();\n        if (node_pair.node->children.end() == node_pair.next_child_to_delete) {\n            auto delnum = _quick_map.erase(node_pair.path);\n            dassert(delnum == 1, \"inconsistent state between quick map and tree\");\n            delete node_pair.node;\n            delete_stack.pop();\n        } else {\n            auto child_it = node_pair.next_child_to_delete;\n            delete_stack.push({node_pair.path + \"/\" + child_it->second->name,\n                               child_it->second,\n                               child_it->second->children.begin()});\n            ++node_pair.next_child_to_delete;\n        }\n    }\n\n    std::string name, parent;\n    auto err = extract_name_parent_from_path(path, name, parent);\n    if (err != ERR_OK) {\n        return err;\n    }\n\n    auto parent_it = _quick_map.find(parent);\n    dassert(parent_it != _quick_map.end(), \"unable to find parent node\");\n    // XXX we cannot delete root, right?\n\n    auto erase_num = parent_it->second->children.erase(name);\n    dassert(erase_num == 1, \"inconsistent state between quick map and tree\");\n    return ERR_OK;\n}\n\nerror_code meta_state_service_simple::set_data_internal(const std::string &node, const blob &value)\n{\n    auto path = normalize_path(node);\n    zauto_lock _(_state_lock);\n    auto it = _quick_map.find(path);\n    if (it == _quick_map.end())\n        return ERR_OBJECT_NOT_FOUND;\n    it->second->data = value;\n    return ERR_OK;\n}\n\nerror_code meta_state_service_simple::apply_transaction(\n    const std::shared_ptr<meta_state_service::transaction_entries> &t_entries)\n{\n    dinfo(\"internal operation after logged\");\n    simple_transaction_entries *entries =\n        dynamic_cast<simple_transaction_entries *>(t_entries.get());\n    dassert(entries != nullptr, \"invalid input parameter\");\n    error_code ec;\n    for (int i = 0; i != entries->_offset; ++i) {\n        operation_entry &e = entries->_ops[i];\n        switch (e._type) {\n        case operation_type::create_node:\n            ec = create_node_internal(e._node, e._value);\n            break;\n        case operation_type::delete_node:\n            ec = delete_node_internal(e._node, false);\n            break;\n        case operation_type::set_data:\n            ec = set_data_internal(e._node, e._value);\n            break;\n        default:\n            dassert(false, \"unsupported operation\");\n        }\n        dassert(ec == ERR_OK, \"unexpected error when applying, err=%s\", ec.to_string());\n    }\n\n    return ERR_OK;\n}\n\nerror_code meta_state_service_simple::initialize(const std::vector<std::string> &args)\n{\n    const char *work_dir =\n        args.empty() ? service_app::current_service_app_info().data_dir.c_str() : args[0].c_str();\n\n    _offset = 0;\n    std::string log_path = dsn::utils::filesystem::path_combine(work_dir, \"meta_state_service.log\");\n    if (utils::filesystem::file_exists(log_path)) {\n        if (FILE *fd = fopen(log_path.c_str(), \"rb\")) {\n            for (;;) {\n                log_header header;\n                if (fread(&header, sizeof(log_header), 1, fd) != 1) {\n                    break;\n                }\n                if (header.magic != log_header::default_magic) {\n                    break;\n                }\n                std::shared_ptr<char> buffer(dsn::utils::make_shared_array<char>(header.size));\n                if (fread(buffer.get(), header.size, 1, fd) != 1) {\n                    break;\n                }\n                _offset += sizeof(header) + header.size;\n                binary_reader reader(blob(buffer, (int)header.size));\n                int op_type;\n                reader.read(op_type);\n\n                switch (static_cast<operation_type>(op_type)) {\n                case operation_type::create_node: {\n                    std::string node;\n                    blob data;\n                    create_node_log::parse(reader, node, data);\n                    create_node_internal(node, data);\n                    break;\n                }\n                case operation_type::delete_node: {\n                    std::string node;\n                    bool recursively_delete;\n                    delete_node_log::parse(reader, node, recursively_delete);\n                    delete_node_internal(node, recursively_delete);\n                    break;\n                }\n                case operation_type::set_data: {\n                    std::string node;\n                    blob data;\n                    set_data_log::parse(reader, node, data);\n                    set_data_internal(node, data);\n                    break;\n                }\n                default:\n                    // The log is complete but its content is modified by cosmic ray. This is\n                    // unacceptable\n                    dassert(false, \"meta state server log corrupted\");\n                }\n            }\n            fclose(fd);\n        }\n    }\n\n    _log = file::open(log_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666);\n    if (!_log) {\n        derror(\"open file failed: %s\", log_path.c_str());\n        return ERR_FILE_OPERATION_FAILED;\n    }\n    return ERR_OK;\n}\n\nstd::shared_ptr<meta_state_service::transaction_entries>\nmeta_state_service_simple::new_transaction_entries(unsigned int capacity)\n{\n    return std::shared_ptr<meta_state_service::transaction_entries>(\n        new meta_state_service_simple::simple_transaction_entries(capacity));\n}\n\ntask_ptr meta_state_service_simple::submit_transaction(\n    /*in-out*/ const std::shared_ptr<meta_state_service::transaction_entries> &t_entries,\n    task_code cb_code,\n    const err_callback &cb_transaction,\n    dsn::task_tracker *tracker)\n{\n    // when checking the snapshot, we block all write operations which come later\n    zauto_lock l(_log_lock);\n    std::set<std::string> snapshot;\n    for (const auto &kv : _quick_map)\n        snapshot.insert(kv.first);\n\n    // try\n    simple_transaction_entries *entries =\n        dynamic_cast<simple_transaction_entries *>(t_entries.get());\n    std::string parent, name;\n    size_t i;\n\n    std::vector<blob> batch_buffer;\n    int total_size = 0;\n    batch_buffer.reserve(entries->_offset);\n\n    for (i = 0; i != entries->_offset; ++i) {\n        operation_entry &op = entries->_ops[i];\n        op._node = normalize_path(op._node);\n\n        switch (op._type) {\n        case operation_type::create_node: {\n            op._result = extract_name_parent_from_path(op._node, name, parent);\n            if (op._result == ERR_OK) {\n                if (snapshot.find(parent) == snapshot.end())\n                    op._result = ERR_OBJECT_NOT_FOUND;\n                else if (snapshot.find(op._node) != snapshot.end())\n                    op._result = ERR_NODE_ALREADY_EXIST;\n                else {\n                    batch_buffer.push_back(create_node_log::get_log(op._node, op._value));\n                    total_size += batch_buffer.back().length();\n                    snapshot.insert(op._node);\n                    op._result = ERR_OK;\n                }\n            }\n        } break;\n        case operation_type::delete_node: {\n            if (snapshot.find(op._node) == snapshot.end()) {\n                op._result = ERR_OBJECT_NOT_FOUND;\n            } else if (op._node == \"/\") {\n                // delete root is forbidden\n                op._result = ERR_INVALID_PARAMETERS;\n            } else {\n                op._node.push_back('/');\n                std::set<std::string>::iterator iter = snapshot.lower_bound(op._node);\n                if (iter != snapshot.end() && (*iter).length() >= op._node.length() &&\n                    memcmp((*iter).c_str(), op._node.c_str(), op._node.length()) == 0) {\n                    // op._node is the prefix of some path, so we regard this directory as not empty\n                    op._result = ERR_INVALID_PARAMETERS;\n                } else {\n                    batch_buffer.push_back(delete_node_log::get_log(op._node, false));\n                    total_size += batch_buffer.back().length();\n                    op._node.pop_back();\n                    snapshot.erase(op._node);\n                    op._result = ERR_OK;\n                }\n            }\n        } break;\n        case operation_type::set_data: {\n            if (snapshot.find(op._node) == snapshot.end())\n                op._result = ERR_OBJECT_NOT_FOUND;\n            else {\n                batch_buffer.push_back(set_data_log::get_log(op._node, op._value));\n                total_size += batch_buffer.back().length();\n                op._result = ERR_OK;\n            }\n        } break;\n        default:\n            dassert(false, \"not supported operation\");\n            break;\n        }\n\n        if (op._result != ERR_OK)\n            break;\n    }\n\n    if (i < entries->_offset) {\n        for (int j = i + 1; j != entries->_offset; ++j)\n            entries->_ops[j]._result = ERR_INCONSISTENT_STATE;\n        return tasking::enqueue(\n            cb_code, tracker, [=]() { cb_transaction(ERR_INCONSISTENT_STATE); });\n    } else {\n        // apply\n        std::shared_ptr<char> batch(dsn::utils::make_shared_array<char>(total_size));\n        char *dest = batch.get();\n        std::for_each(batch_buffer.begin(), batch_buffer.end(), [&dest](const blob &entry) {\n            memcpy(dest, entry.data(), entry.length());\n            dest += entry.length();\n        });\n        dassert(dest - batch.get() == total_size, \"memcpy error\");\n        task_ptr task(new error_code_future(cb_code, cb_transaction, 0));\n        task->set_tracker(tracker);\n        write_log(blob(batch, total_size),\n                  [this, t_entries] { return apply_transaction(t_entries); },\n                  task);\n        return task;\n    }\n}\n\ntask_ptr meta_state_service_simple::create_node(const std::string &node,\n                                                task_code cb_code,\n                                                const err_callback &cb_create,\n                                                const blob &value,\n                                                dsn::task_tracker *tracker)\n{\n    task_ptr task(new error_code_future(cb_code, cb_create, 0));\n    task->set_tracker(tracker);\n    write_log(create_node_log::get_log(node, value),\n              [=] { return create_node_internal(node, value); },\n              task);\n    return task;\n}\n\ntask_ptr meta_state_service_simple::delete_node(const std::string &node,\n                                                bool recursively_delete,\n                                                task_code cb_code,\n                                                const err_callback &cb_delete,\n                                                dsn::task_tracker *tracker)\n{\n    task_ptr task(new error_code_future(cb_code, cb_delete, 0));\n    task->set_tracker(tracker);\n    write_log(delete_node_log::get_log(node, recursively_delete),\n              [=] { return delete_node_internal(node, recursively_delete); },\n              task);\n    return task;\n}\n\ntask_ptr meta_state_service_simple::node_exist(const std::string &node,\n                                               task_code cb_code,\n                                               const err_callback &cb_exist,\n                                               dsn::task_tracker *tracker)\n{\n    error_code err;\n    {\n        zauto_lock _(_state_lock);\n        err = _quick_map.find(normalize_path(node)) != _quick_map.end() ? ERR_OK\n                                                                        : ERR_OBJECT_NOT_FOUND;\n    }\n    return tasking::enqueue(cb_code, tracker, [=]() { cb_exist(err); });\n}\n\ntask_ptr meta_state_service_simple::get_data(const std::string &node,\n                                             task_code cb_code,\n                                             const err_value_callback &cb_get_data,\n                                             dsn::task_tracker *tracker)\n{\n    auto path = normalize_path(node);\n    zauto_lock _(_state_lock);\n    auto me_it = _quick_map.find(path);\n    if (me_it == _quick_map.end()) {\n        return tasking::enqueue(cb_code, tracker, [=]() { cb_get_data(ERR_OBJECT_NOT_FOUND, {}); });\n    } else {\n        auto data_copy = me_it->second->data;\n        return tasking::enqueue(\n            cb_code, tracker, [=]() mutable { cb_get_data(ERR_OK, std::move(data_copy)); });\n    }\n}\n\ntask_ptr meta_state_service_simple::set_data(const std::string &node,\n                                             const blob &value,\n                                             task_code cb_code,\n                                             const err_callback &cb_set_data,\n                                             dsn::task_tracker *tracker)\n{\n    task_ptr task(new error_code_future(cb_code, cb_set_data, 0));\n    task->set_tracker(tracker);\n    write_log(\n        set_data_log::get_log(node, value), [=] { return set_data_internal(node, value); }, task);\n    return task;\n}\n\ntask_ptr meta_state_service_simple::get_children(const std::string &node,\n                                                 task_code cb_code,\n                                                 const err_stringv_callback &cb_get_children,\n                                                 dsn::task_tracker *tracker)\n{\n    auto path = normalize_path(node);\n    zauto_lock _(_state_lock);\n    auto me_it = _quick_map.find(path);\n    if (me_it == _quick_map.end()) {\n        return tasking::enqueue(\n            cb_code, tracker, [=]() { cb_get_children(ERR_OBJECT_NOT_FOUND, {}); });\n    } else {\n        std::vector<std::string> result;\n        for (auto &child_pair : me_it->second->children) {\n            result.push_back(child_pair.first);\n        }\n        return tasking::enqueue(\n            cb_code, tracker, [=]() mutable { cb_get_children(ERR_OK, move(result)); });\n    }\n}\n\nmeta_state_service_simple::~meta_state_service_simple()\n{\n    _tracker.cancel_outstanding_tasks();\n    file::close(_log);\n\n    for (const auto &kv : _quick_map) {\n        if (\"/\" != kv.first) {\n            delete kv.second;\n        }\n    }\n    _quick_map.clear();\n}\n}\n}\n"
  },
  {
    "path": "src/meta/meta_state_service_simple.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a simple version of meta state service for development\n *\n * Revision history:\n *     2015-11-03, @imzhenyu (Zhenyu.Guo@microsoft.com), setup the sketch\n *     2015-11-11, Tianyi WANG, first version done\n */\n\n#include <queue>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/meta_state_service.h>\n#include \"common/replication_common.h\"\n\nnamespace dsn {\nnamespace dist {\nDEFINE_TASK_CODE_AIO(LPC_META_STATE_SERVICE_SIMPLE_INTERNAL,\n                     TASK_PRIORITY_HIGH,\n                     THREAD_POOL_DEFAULT);\n\nclass meta_state_service_simple : public meta_state_service\n{\npublic:\n    explicit meta_state_service_simple()\n        : _root(\"/\", nullptr),\n          _quick_map({std::make_pair(\"/\", &_root)}),\n          _log_lock(true),\n          _log(nullptr),\n          _offset(0)\n    {\n    }\n\n    // work_path = (argc > 0 ? argv[0] : current_app_data_dir)\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual error_code finalize() override { return ERR_OK; }\n\n    virtual std::shared_ptr<meta_state_service::transaction_entries>\n    new_transaction_entries(unsigned int capacity) override;\n\n    virtual task_ptr\n    submit_transaction(const std::shared_ptr<meta_state_service::transaction_entries> &t_entries,\n                       task_code cb_code,\n                       const err_callback &cb_create_tree,\n                       dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr create_node(const std::string &node,\n                                 task_code cb_code,\n                                 const err_callback &cb_create,\n                                 const blob &value = blob(),\n                                 dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr delete_node(const std::string &node,\n                                 bool recursively_delete,\n                                 task_code cb_code,\n                                 const err_callback &cb_delete,\n                                 dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr node_exist(const std::string &node,\n                                task_code cb_code,\n                                const err_callback &cb_exist,\n                                dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr get_data(const std::string &node,\n                              task_code cb_code,\n                              const err_value_callback &cb_get_data,\n                              dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr set_data(const std::string &node,\n                              const blob &value,\n                              task_code cb_code,\n                              const err_callback &cb_set_data,\n                              dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr get_children(const std::string &node,\n                                  task_code cb_code,\n                                  const err_stringv_callback &cb_get_children,\n                                  dsn::task_tracker *tracker = nullptr) override;\n    virtual ~meta_state_service_simple() override;\n\nprivate:\n    struct operation\n    {\n        bool done;\n        std::function<void(bool)> cb;\n        operation(bool done, std::function<void(bool)> &&cb) : done(done), cb(move(cb)) {}\n    };\n\n#pragma pack(push, 1)\n    struct log_header\n    {\n        int magic;\n        size_t size;\n        static const int default_magic = 0xdeadbeef;\n        log_header() : magic(default_magic), size(0) {}\n    };\n#pragma pack(pop)\n\n    struct state_node\n    {\n        std::string name;\n        blob data;\n\n        state_node *parent;\n        std::unordered_map<std::string, state_node *> children;\n\n        state_node(const std::string &nm, state_node *pt, const blob &dt = {})\n            : name(nm), data(dt), parent(pt)\n        {\n        }\n    };\n\n    enum class operation_type\n    {\n        create_node,\n        delete_node,\n        set_data,\n    };\n\n    struct operation_entry\n    {\n        operation_type _type;\n        std::string _node;\n        blob _value; // for only creaet/set\n        error_code _result;\n    };\n\n    struct simple_transaction_entries : public meta_state_service::transaction_entries\n    {\n        std::vector<operation_entry> _ops;\n        unsigned int _offset;\n\n        simple_transaction_entries(unsigned int capacity) : _ops(capacity), _offset(0) {}\n        virtual ~simple_transaction_entries() {}\n\n        virtual error_code create_node(const std::string &node, const blob &value) override\n        {\n            return append(operation_type::create_node, node, value);\n        }\n\n        virtual error_code delete_node(const std::string &node) override\n        {\n            return append(operation_type::delete_node, node, blob());\n        }\n\n        virtual error_code set_data(const std::string &node, const blob &value) override\n        {\n            return append(operation_type::set_data, node, value);\n        }\n\n        error_code append(operation_type type, const std::string &node, const blob &value)\n        {\n            if (_offset >= _ops.size())\n                return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n            _ops[_offset]._type = type;\n            _ops[_offset]._node = node;\n            _ops[_offset]._value = value;\n            ++_offset;\n\n            return ERR_OK;\n        }\n\n        virtual error_code get_result(unsigned int entry_index) override\n        {\n            if (entry_index >= _offset)\n                return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n            return _ops[entry_index]._result;\n        }\n    };\n\n    template <operation_type op, typename... Args>\n    struct log_struct;\n    template <operation_type op, typename Head, typename... Tail>\n    struct log_struct<op, Head, Tail...>\n    {\n        static blob get_log(const Head &head, Tail... tail)\n        {\n            binary_writer writer;\n            writer.write_pod(log_header());\n            writer.write(static_cast<int>(op));\n            write(writer, head, tail...);\n            auto shared_blob = writer.get_buffer();\n            reinterpret_cast<log_header *>(const_cast<char *>(shared_blob.data()))->size =\n                shared_blob.length() - sizeof(log_header);\n            return shared_blob;\n        }\n        static void write(binary_writer &writer, const Head &head, const Tail &... tail)\n        {\n            marshall(writer, head, DSF_THRIFT_BINARY);\n            log_struct<op, Tail...>::write(writer, tail...);\n        }\n        static void parse(binary_reader &reader, Head &head, Tail &... tail)\n        {\n            unmarshall(reader, head, DSF_THRIFT_BINARY);\n            log_struct<op, Tail...>::parse(reader, tail...);\n        }\n    };\n    template <operation_type op, typename Head>\n    struct log_struct<op, Head>\n    {\n        static void write(binary_writer &writer, const Head &head)\n        {\n            marshall(writer, head, DSF_THRIFT_BINARY);\n        }\n        static void parse(binary_reader &reader, Head &head)\n        {\n            unmarshall(reader, head, DSF_THRIFT_BINARY);\n        }\n    };\n\n    using create_node_log = log_struct<operation_type::create_node, std::string, blob>;\n    using delete_node_log = log_struct<operation_type::delete_node, std::string, bool>;\n    using set_data_log = log_struct<operation_type::set_data, std::string, blob>;\n\n    static std::string normalize_path(const std::string &s);\n    static error_code extract_name_parent_from_path(const std::string &s,\n                                                    /*out*/ std::string &name,\n                                                    /*out*/ std::string &parent);\n\n    void\n    write_log(blob &&log_blob, std::function<error_code(void)> internal_operation, task_ptr task);\n\n    error_code create_node_internal(const std::string &node, const blob &blob);\n    error_code delete_node_internal(const std::string &node, bool recursive);\n    error_code set_data_internal(const std::string &node, const blob &blob);\n    error_code\n    apply_transaction(const std::shared_ptr<meta_state_service::transaction_entries> &t_entries);\n\n    typedef std::unordered_map<std::string, state_node *> quick_map;\n\n    zlock _queue_lock;\n    std::queue<std::unique_ptr<operation>> _task_queue;\n\n    zlock _state_lock;\n    state_node _root;     // tree\n    quick_map _quick_map; // <path, node*>\n\n    zlock _log_lock;\n    disk_file *_log;\n    uint64_t _offset;\n\n    dsn::task_tracker _tracker;\n};\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_state_service_utils.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/cpp/pipeline.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication.h>\n\n#include \"meta_state_service_utils.h\"\n#include \"meta_state_service_utils_impl.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace mss {\n\nmeta_storage::meta_storage(dist::meta_state_service *remote_storage, task_tracker *tracker)\n    : _remote(remote_storage), _tracker(tracker)\n{\n    dassert(tracker != nullptr, \"must set task tracker\");\n}\n\nmeta_storage::~meta_storage() = default;\n\nvoid meta_storage::create_node_recursively(std::queue<std::string> &&nodes,\n                                           blob &&value,\n                                           std::function<void()> &&cb)\n{\n    dassert(!nodes.empty(), \"\");\n\n    on_create_recursively op;\n    op.initialize(this);\n    op.args.reset(\n        new on_create_recursively::arguments{std::move(cb), std::move(value), std::move(nodes)});\n    op.run();\n}\n\nvoid meta_storage::create_node(std::string &&node, blob &&value, std::function<void()> &&cb)\n{\n    on_create op;\n    op.initialize(this);\n    op.args.reset(new on_create::arguments{std::move(cb), std::move(value), std::move(node)});\n    op.run();\n}\n\nvoid meta_storage::delete_node_recursively(std::string &&node, std::function<void()> &&cb)\n{\n    delete_node_impl(std::move(node), std::move(cb), true);\n}\n\nvoid meta_storage::delete_node(std::string &&node, std::function<void()> &&cb)\n{\n    delete_node_impl(std::move(node), std::move(cb), false);\n}\n\nvoid meta_storage::delete_node_impl(std::string &&node,\n                                    std::function<void()> &&cb,\n                                    bool is_recursive)\n{\n    on_delete op;\n    op.initialize(this);\n    op.args.reset(new on_delete::arguments);\n    op.args->cb = std::move(cb);\n    op.args->node = std::move(node);\n    op.args->is_recursively_delete = is_recursive;\n    op.run();\n}\n\nvoid meta_storage::set_data(std::string &&node, blob &&value, std::function<void()> &&cb)\n{\n    on_set_data op;\n    op.initialize(this);\n    op.args.reset(new on_set_data::arguments{std::move(cb), std::move(node), std::move(value)});\n    op.run();\n}\n\nvoid meta_storage::get_data(std::string &&node, std::function<void(const blob &)> &&cb)\n{\n    on_get_data op;\n    op.initialize(this);\n    op.args.reset(new on_get_data::arguments{std::move(cb), std::move(node)});\n    op.run();\n}\n\nvoid meta_storage::get_children(std::string &&node,\n                                std::function<void(bool, const std::vector<std::string> &)> &&cb)\n{\n    on_get_children op;\n    op.initialize(this);\n    op.args.reset(new on_get_children::arguments{std::move(cb), std::move(node)});\n    op.run();\n}\n\n} // namespace mss\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_state_service_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/meta_state_service.h>\n\nnamespace dsn {\nnamespace replication {\nnamespace mss { // abbreviation of meta_state_service\n\n/// This class is a convenience wrapper over meta_state_service.\n/// It wraps every operation with a simple error handling mechanism, and provides utilities\n/// like recursive node creation.\n/// Notice: The operations always run in THREAD_POOL_META_STATE: LPC_META_STATE_HIGH.\n///         This class is thread-safe.\n///\n/// ERROR HANDLING:\n/// Currently it retries for every timeout(ERR_TIMEOUT) operation infinitely,\n/// and delays 1sec for each attempt. For unexpected failures it will terminate\n/// the program. This is somewhat brute force but suitable for most cases.\n/// For more fine-grained error handling strategy, use meta_state_service instead.\n/// \\see meta_state_service_utils_impl.h # operation\nstruct meta_storage\n{\n    meta_storage(dist::meta_state_service *remote_storage, task_tracker *tracker);\n\n    ~meta_storage();\n\n    /// Asynchronously create nodes recursively from top down.\n    void create_node_recursively(std::queue<std::string> &&nodes,\n                                 blob &&value,\n                                 std::function<void()> &&cb);\n\n    void create_node(std::string &&node, blob &&value, std::function<void()> &&cb);\n\n    void delete_node_recursively(std::string &&node, std::function<void()> &&cb);\n\n    void delete_node(std::string &&node, std::function<void()> &&cb);\n\n    /// Will fatal if node doesn't exists.\n    void set_data(std::string &&node, blob &&value, std::function<void()> &&cb);\n\n    /// If node does not exist, cb will receive an empty blob.\n    void get_data(std::string &&node, std::function<void(const blob &)> &&cb);\n\n    /// \\param cb: void (bool node_exists, const std::vector<std::string> &children)\n    ///            `children` contains the name (not full path) of children nodes.\n    ///            `node_exists` indicates whether this node exists.\n    void get_children(std::string &&node,\n                      std::function<void(bool, const std::vector<std::string> &)> &&cb);\n\nprivate:\n    void delete_node_impl(std::string &&node, std::function<void()> &&cb, bool is_recursive);\n\nprivate:\n    friend struct operation;\n\n    dist::meta_state_service *_remote;\n    dsn::task_tracker *_tracker;\n};\n\n} // namespace mss\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_state_service_utils_impl.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/cpp/pipeline.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"meta_state_service_utils.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace mss {\n\nstruct op_type\n{\n    enum type\n    {\n        OP_NONE,\n\n        OP_CREATE_RECURSIVELY,\n        OP_CREATE,\n        OP_DELETE_RECURSIVELY,\n        OP_DELETE,\n        OP_SET_DATA,\n        OP_GET_DATA,\n        OP_GET_CHILDREN,\n    };\n\n    static const char *to_string(type v)\n    {\n        static const char *op_type_to_string_map[] = {\n            \"OP_CREATE_RECURSIVELY\",\n            \"OP_CREATE\",\n            \"OP_DELETE_RECURSIVELY\",\n            \"OP_DELETE\",\n            \"OP_SET_DATA\",\n            \"OP_GET_DATA\",\n            \"OP_GET_CHILDREN\",\n        };\n\n        dassert_f(v != OP_NONE && v <= (sizeof(op_type_to_string_map) / sizeof(char *)),\n                  \"invalid type: {}\",\n                  v);\n        return op_type_to_string_map[v - 1];\n    }\n};\n\n/// Base class for all operations.\nstruct operation : pipeline::environment\n{\n    void initialize(meta_storage *storage)\n    {\n        _ms = storage;\n        task_tracker(storage->_tracker).thread_pool(LPC_META_STATE_HIGH);\n    }\n\n    // The common strategy for error handling:\n    // retry after 1 sec if timeout, or terminate.\n    template <typename T>\n    void on_error(T *this_instance, op_type::type type, error_code ec, const std::string &path)\n    {\n        if (ec == ERR_TIMEOUT) {\n            dwarn_f(\"request({}) on path({}) was timeout, retry after 1 second\",\n                    op_type::to_string(type),\n                    path);\n            pipeline::repeat(std::move(*this_instance), 1_s);\n            return;\n        }\n        dassert_f(false,\n                  \"request({}) on path({}) encountered an unexpected error({})\",\n                  op_type::to_string(type),\n                  path,\n                  ec.to_string());\n    }\n\n    dist::meta_state_service *remote_storage() const { return _ms->_remote; }\n\n    dsn::task_tracker *tracker() const { return _ms->_tracker; }\n\nprivate:\n    meta_storage *_ms{nullptr};\n};\n\n// Developer Notes:\n//\n// As a concern of performance, arguments are wrapped into a shared_ptr to be used\n// in callback of meta_state_service without copying.\n//\n// To be able to repeat the internal task using pipeline::repeat, the operations must\n// implement `void run()` method.\n//\n\nstruct on_create_recursively : operation\n{\n    struct arguments\n    {\n        std::function<void()> cb;\n        dsn::blob val;\n        std::queue<std::string> nodes;\n    };\n    std::shared_ptr<arguments> args;\n\n    // ASSERTED: !args->nodes.empty\n    void run()\n    {\n        // first node\n        if (_cur_path.empty()) { // first node requires leading '/'\n            _cur_path += args->nodes.front();\n            args->nodes.pop();\n        }\n\n        remote_storage()->create_node(_cur_path,\n                                      LPC_META_STATE_HIGH,\n                                      [op = *this](error_code ec) mutable { op.on_error(ec); },\n                                      args->nodes.empty() ? args->val : blob(),\n                                      tracker());\n    }\n\n    void on_error(error_code ec)\n    {\n        if (ec == ERR_OK || ec == ERR_NODE_ALREADY_EXIST) {\n            // create next node\n            if (!args->nodes.empty()) {\n                _cur_path += \"/\" + args->nodes.front();\n                args->nodes.pop();\n                pipeline::repeat(std::move(*this));\n            } else {\n                args->cb();\n                _cur_path.clear();\n            }\n            return;\n        }\n        operation::on_error(this, op_type::OP_CREATE_RECURSIVELY, ec, _cur_path);\n    }\n\nprivate:\n    std::string _cur_path;\n};\n\nstruct on_create : operation\n{\n    struct arguments\n    {\n        std::function<void()> cb;\n        dsn::blob val;\n        std::string node;\n    };\n    std::shared_ptr<arguments> args;\n\n    void run()\n    {\n        remote_storage()->create_node(args->node,\n                                      LPC_META_STATE_HIGH,\n                                      [op = *this](error_code ec) mutable { op.on_error(ec); },\n                                      args->val,\n                                      tracker());\n    }\n\n    void on_error(error_code ec)\n    {\n        if (ec == ERR_OK || ec == ERR_NODE_ALREADY_EXIST) {\n            args->cb();\n            return;\n        }\n\n        operation::on_error(this, op_type::OP_CREATE, ec, args->node);\n    }\n};\n\nstruct on_delete : operation\n{\n    struct arguments\n    {\n        std::function<void()> cb;\n        std::string node;\n        bool is_recursively_delete{false};\n    };\n    std::shared_ptr<arguments> args;\n\n    void run()\n    {\n        remote_storage()->delete_node(args->node,\n                                      args->is_recursively_delete,\n                                      LPC_META_STATE_HIGH,\n                                      [op = *this](error_code ec) mutable { op.on_error(ec); },\n                                      tracker());\n    }\n\n    void on_error(error_code ec)\n    {\n        if (ec == ERR_OK || ec == ERR_OBJECT_NOT_FOUND) {\n            args->cb();\n            return;\n        }\n\n        auto type =\n            args->is_recursively_delete ? op_type::OP_DELETE_RECURSIVELY : op_type::OP_DELETE;\n        operation::on_error(this, type, ec, args->node);\n    }\n};\n\nstruct on_get_data : operation\n{\n    struct arguments\n    {\n        std::function<void(const blob &)> cb;\n        std::string node;\n    };\n    std::shared_ptr<arguments> args;\n\n    void run()\n    {\n        remote_storage()->get_data(\n            args->node,\n            LPC_META_STATE_HIGH,\n            [op = *this](error_code ec, const blob &val) mutable { op.on_error(ec, val); },\n            tracker());\n    }\n\n    void on_error(error_code ec, const blob &val)\n    {\n        if (ec == ERR_OK || ec == ERR_OBJECT_NOT_FOUND) {\n            args->cb(val);\n            return;\n        }\n        operation::on_error(this, op_type::OP_GET_DATA, ec, args->node);\n    }\n};\n\nstruct on_set_data : operation\n{\n    struct arguments\n    {\n        std::function<void()> cb;\n        std::string node;\n        dsn::blob val;\n    };\n    std::shared_ptr<arguments> args;\n\n    void run()\n    {\n        remote_storage()->set_data(args->node,\n                                   args->val,\n                                   LPC_META_STATE_HIGH,\n                                   [op = *this](error_code ec) mutable { op.on_error(ec); },\n                                   tracker());\n    }\n\n    void on_error(error_code ec)\n    {\n        if (ec == ERR_OK) {\n            args->cb();\n            return;\n        }\n\n        operation::on_error(this, op_type::OP_SET_DATA, ec, args->node);\n    }\n};\n\nstruct on_get_children : operation\n{\n    struct arguments\n    {\n        std::function<void(bool, const std::vector<std::string> &)> cb;\n        std::string node;\n    };\n    std::shared_ptr<arguments> args;\n\n    void run()\n    {\n        remote_storage()->get_children(\n            args->node,\n            LPC_META_STATE_HIGH,\n            [op = *this](error_code ec, const std::vector<std::string> &children) mutable {\n                op.on_error(ec, children);\n            },\n            tracker());\n    }\n\n    void on_error(error_code ec, const std::vector<std::string> &children)\n    {\n        if (ec == ERR_OK) {\n            args->cb(true, children);\n            return;\n        }\n        if (ec == ERR_OBJECT_NOT_FOUND) {\n            args->cb(false, children);\n            return;\n        }\n        operation::on_error(this, op_type::OP_GET_CHILDREN, ec, args->node);\n    }\n};\n\n} // namespace mss\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/meta_state_service_zookeeper.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     meta state service implemented with zookeeper\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/replication/replication.codes.h>\n\n#include <boost/algorithm/string.hpp>\n#include <boost/lexical_cast.hpp>\n\n#include \"meta_state_service_zookeeper.h\"\n#include \"zookeeper/zookeeper_session_mgr.h\"\n#include \"zookeeper/zookeeper_session.h\"\n#include \"zookeeper/zookeeper_error.h\"\n\nnamespace dsn {\nnamespace dist {\n\nclass zoo_transaction : public meta_state_service::transaction_entries\n{\npublic:\n    zoo_transaction(unsigned int capacity);\n    virtual ~zoo_transaction() override {}\n    virtual error_code create_node(const std::string &name, const blob &value = blob()) override;\n    virtual error_code set_data(const std::string &name, const blob &value = blob()) override;\n    virtual error_code delete_node(const std::string &name) override;\n    virtual error_code get_result(unsigned int entry_index) override;\n\n    std::shared_ptr<zookeeper_session::zoo_atomic_packet> packet() { return _pkt; }\nprivate:\n    std::shared_ptr<zookeeper_session::zoo_atomic_packet> _pkt;\n};\n\nzoo_transaction::zoo_transaction(unsigned int capacity)\n{\n    _pkt.reset(new zookeeper_session::zoo_atomic_packet(capacity));\n}\n\nerror_code zoo_transaction::create_node(const std::string &path, const blob &value)\n{\n    if (_pkt->_count >= _pkt->_capacity)\n        return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n\n    unsigned int &offset = _pkt->_count;\n    std::string &p = (_pkt->_paths)[offset];\n    blob &b = (_pkt->_datas)[offset];\n\n    p = path;\n    b = value;\n\n    zoo_op_t &op = _pkt->_ops[offset];\n    op.type = ZOO_CREATE_OP;\n    op.create_op.path = p.c_str();\n    op.create_op.flags = 0;\n    op.create_op.acl = &ZOO_OPEN_ACL_UNSAFE;\n    op.create_op.data = b.data();\n    op.create_op.datalen = b.length();\n\n    /* output path is either same with path(for non-sequencial node)\n     * or 10 bytes more than the path(for sequencial node) */\n    int buffer_length = path.size() + 20;\n\n    op.create_op.buf = _pkt->alloc_buffer(buffer_length);\n    op.create_op.buflen = buffer_length;\n\n    ++offset;\n    return ERR_OK;\n}\n\nerror_code zoo_transaction::delete_node(const std::string &path)\n{\n    if (_pkt->_count >= _pkt->_capacity)\n        return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n    unsigned int &offset = _pkt->_count;\n    std::string &p = (_pkt->_paths)[offset];\n\n    p = path;\n\n    zoo_op_t &op = _pkt->_ops[offset];\n    op.type = ZOO_DELETE_OP;\n    op.delete_op.path = p.c_str();\n    op.delete_op.version = -1;\n\n    ++offset;\n    return ERR_OK;\n}\n\nerror_code zoo_transaction::set_data(const std::string &name, const blob &value)\n{\n    if (_pkt->_count >= _pkt->_capacity)\n        return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n    unsigned int &offset = _pkt->_count;\n    std::string &p = (_pkt->_paths)[offset];\n    blob &b = (_pkt->_datas[offset]);\n    p = name;\n    b = value;\n\n    zoo_op_t &op = _pkt->_ops[offset];\n    op.type = ZOO_SETDATA_OP;\n    op.set_op.path = p.c_str();\n    op.set_op.data = value.data();\n    op.set_op.datalen = value.length();\n    op.set_op.version = -1;\n    op.set_op.stat = (struct Stat *)_pkt->alloc_buffer(sizeof(struct Stat));\n\n    ++offset;\n    return ERR_OK;\n}\n\nerror_code zoo_transaction::get_result(unsigned int entry_index)\n{\n    if (entry_index >= _pkt->_count)\n        return ERR_ARRAY_INDEX_OUT_OF_RANGE;\n    return from_zerror(_pkt->_results[entry_index].err);\n}\n\nmeta_state_service_zookeeper::meta_state_service_zookeeper() : ref_counter() { _first_call = true; }\n\nmeta_state_service_zookeeper::~meta_state_service_zookeeper()\n{\n    _tracker.wait_outstanding_tasks();\n    if (_session) {\n        _session->detach(this);\n        _session = nullptr;\n    }\n}\n\nerror_code meta_state_service_zookeeper::initialize(const std::vector<std::string> &)\n{\n    _session =\n        zookeeper_session_mgr::instance().get_session(service_app::current_service_app_info());\n    _zoo_state = _session->attach(this,\n                                  std::bind(&meta_state_service_zookeeper::on_zoo_session_evt,\n                                            ref_this(this),\n                                            std::placeholders::_1));\n    if (_zoo_state != ZOO_CONNECTED_STATE) {\n        _notifier.wait_for(zookeeper_session_mgr::instance().timeout());\n        if (_zoo_state != ZOO_CONNECTED_STATE)\n            return ERR_TIMEOUT;\n    }\n\n    ddebug(\"init meta_state_service_zookeeper succeed\");\n\n    // Notice: this reference is released in finalize\n    add_ref();\n    return ERR_OK;\n}\n\nerror_code meta_state_service_zookeeper::finalize()\n{\n    release_ref();\n    return ERR_OK;\n}\n\nstd::shared_ptr<meta_state_service::transaction_entries>\nmeta_state_service_zookeeper::new_transaction_entries(unsigned int capacity)\n{\n    std::shared_ptr<zoo_transaction> t(new zoo_transaction(capacity));\n    return t;\n}\n\n#define VISIT_INIT(tsk, op_type, node)                                                             \\\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();                    \\\n    zookeeper_session::zoo_input *input = &op->_input;                                             \\\n    op->_callback_function = std::bind(&meta_state_service_zookeeper::visit_zookeeper_internal,    \\\n                                       ref_this(this),                                             \\\n                                       tsk,                                                        \\\n                                       std::placeholders::_1);                                     \\\n    op->_optype = op_type;                                                                         \\\n    input->_path = node;\n\ntask_ptr meta_state_service_zookeeper::create_node(const std::string &node,\n                                                   task_code cb_code,\n                                                   const err_callback &cb_create,\n                                                   const blob &value,\n                                                   dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_create, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call create, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_CREATE, node);\n    input->_value = value;\n    input->_flags = 0;\n\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::submit_transaction(\n    const std::shared_ptr<transaction_entries> &entries,\n    task_code cb_code,\n    const err_callback &cb_transaction,\n    dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_transaction, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call submit batch\");\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    zookeeper_session::zoo_input *input = &op->_input;\n    op->_callback_function = std::bind(&meta_state_service_zookeeper::visit_zookeeper_internal,\n                                       ref_this(this),\n                                       tsk,\n                                       std::placeholders::_1);\n    op->_optype = zookeeper_session::ZOO_OPERATION::ZOO_TRANSACTION;\n\n    zoo_transaction *t = dynamic_cast<zoo_transaction *>(entries.get());\n    input->_pkt = t->packet();\n\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::delete_empty_node(const std::string &node,\n                                                         task_code cb_code,\n                                                         const err_callback &cb_delete,\n                                                         dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_delete, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call delete, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_DELETE, node);\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::delete_node(const std::string &node,\n                                                   bool recursively_delete,\n                                                   task_code cb_code,\n                                                   const err_callback &cb_delete,\n                                                   dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_delete, 0));\n    tsk->set_tracker(tracker);\n    err_stringv_callback after_get_children = [node, recursively_delete, cb_code, tsk, this](\n        error_code err, const std::vector<std::string> &children) {\n        if (ERR_OK != err)\n            tsk->enqueue_with(err);\n        else if (children.empty())\n            delete_empty_node(\n                node, cb_code, [tsk](error_code err) { tsk->enqueue_with(err); }, &_tracker);\n        else if (!recursively_delete)\n            tsk->enqueue_with(ERR_INVALID_PARAMETERS);\n        else {\n            std::atomic_int *child_count = new std::atomic_int();\n            std::atomic_int *error_count = new std::atomic_int();\n\n            child_count->store((int)children.size());\n            error_count->store(0);\n\n            for (auto &child : children) {\n                delete_node(node + \"/\" + child,\n                            true,\n                            cb_code,\n                            [=](error_code err) {\n                                if (ERR_OK != err)\n                                    ++(*error_count);\n                                int result = --(*child_count);\n                                if (0 == result) {\n                                    if (0 == *error_count)\n                                        delete_empty_node(\n                                            node,\n                                            cb_code,\n                                            [tsk](error_code err) { tsk->enqueue_with(err); },\n                                            &_tracker);\n                                    else\n                                        tsk->enqueue_with(ERR_FILE_OPERATION_FAILED);\n                                    delete child_count;\n                                    delete error_count;\n                                }\n                            },\n                            &_tracker);\n            }\n        }\n    };\n\n    get_children(node, cb_code, after_get_children, &_tracker);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::get_data(const std::string &node,\n                                                task_code cb_code,\n                                                const err_value_callback &cb_get_data,\n                                                dsn::task_tracker *tracker)\n{\n    err_value_future_ptr tsk(new err_value_future(cb_code, cb_get_data, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call get, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_GET, node);\n    input->_is_set_watch = 0;\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::set_data(const std::string &node,\n                                                const blob &value,\n                                                task_code cb_code,\n                                                const err_callback &cb_set_data,\n                                                dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_set_data, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call set, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_SET, node);\n\n    input->_value = value;\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::node_exist(const std::string &node,\n                                                  task_code cb_code,\n                                                  const err_callback &cb_exist,\n                                                  dsn::task_tracker *tracker)\n{\n    error_code_future_ptr tsk(new error_code_future(cb_code, cb_exist, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call node_exist, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_EXISTS, node);\n    input->_is_set_watch = 0;\n    _session->visit(op);\n    return tsk;\n}\n\ntask_ptr meta_state_service_zookeeper::get_children(const std::string &node,\n                                                    task_code cb_code,\n                                                    const err_stringv_callback &cb_get_children,\n                                                    dsn::task_tracker *tracker)\n{\n    err_stringv_future_ptr tsk(new err_stringv_future(cb_code, cb_get_children, 0));\n    tsk->set_tracker(tracker);\n    dinfo(\"call get children, node(%s)\", node.c_str());\n    VISIT_INIT(tsk, zookeeper_session::ZOO_OPERATION::ZOO_GETCHILDREN, node);\n    input->_is_set_watch = 0;\n    _session->visit(op);\n    return tsk;\n}\n\n/*static*/\n/* this function runs in zookeeper do-completion thread */\nvoid meta_state_service_zookeeper::on_zoo_session_evt(ref_this _this, int zoo_state)\n{\n    _this->_zoo_state = zoo_state;\n\n    if (ZOO_CONNECTING_STATE == zoo_state) {\n        // TODO: support the switch of zookeeper session\n        dwarn(\"the zk session is reconnecting\");\n    } else if (_this->_first_call && ZOO_CONNECTED_STATE == zoo_state) {\n        _this->_first_call = false;\n        _this->_notifier.notify();\n    } else {\n        // ignore\n    }\n}\n/*static*/\n/*this function runs in zookeper do-completion thread*/\nvoid meta_state_service_zookeeper::visit_zookeeper_internal(ref_this,\n                                                            task_ptr callback,\n                                                            void *result)\n{\n    zookeeper_session::zoo_opcontext *op =\n        reinterpret_cast<zookeeper_session::zoo_opcontext *>(result);\n    dinfo(\n        \"visit zookeeper internal: ans(%s), call type(%d)\", zerror(op->_output.error), op->_optype);\n\n    switch (op->_optype) {\n    case zookeeper_session::ZOO_OPERATION::ZOO_CREATE:\n    case zookeeper_session::ZOO_OPERATION::ZOO_DELETE:\n    case zookeeper_session::ZOO_OPERATION::ZOO_EXISTS:\n    case zookeeper_session::ZOO_OPERATION::ZOO_SET:\n    case zookeeper_session::ZOO_OPERATION::ZOO_TRANSACTION: {\n        auto tsk = reinterpret_cast<error_code_future *>(callback.get());\n        tsk->enqueue_with(from_zerror(op->_output.error));\n    } break;\n    case zookeeper_session::ZOO_OPERATION::ZOO_GET: {\n        auto tsk = reinterpret_cast<err_value_future *>(callback.get());\n        blob data;\n        if (ZOK == op->_output.error) {\n            std::shared_ptr<char> buf(\n                dsn::utils::make_shared_array<char>(op->_output.get_op.value_length));\n            memcpy(buf.get(), op->_output.get_op.value, op->_output.get_op.value_length);\n            data.assign(buf, 0, op->_output.get_op.value_length);\n        }\n        tsk->enqueue_with(from_zerror(op->_output.error), data);\n    } break;\n    case zookeeper_session::ZOO_OPERATION::ZOO_GETCHILDREN: {\n        auto tsk = reinterpret_cast<err_stringv_future *>(callback.get());\n        std::vector<std::string> result;\n        if (ZOK == op->_output.error) {\n            const String_vector *vec = op->_output.getchildren_op.strings;\n            result.resize(vec->count);\n            for (int i = 0; i != vec->count; ++i)\n                result[i].assign(vec->data[i]);\n        }\n        tsk->enqueue_with(from_zerror(op->_output.error), std::move(result));\n    } break;\n    default:\n        break;\n    }\n}\n}\n}\n"
  },
  {
    "path": "src/meta/meta_state_service_zookeeper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     meta state service implemented with zookeeper\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/dist/meta_state_service.h>\n#include <dsn/dist/distributed_lock_service.h>\n\nnamespace dsn {\nnamespace dist {\n\nclass zookeeper_session;\nclass meta_state_service_zookeeper : public meta_state_service, public ref_counter\n{\npublic:\n    explicit meta_state_service_zookeeper();\n    virtual ~meta_state_service_zookeeper() override;\n\n    // no parameter need\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual error_code finalize() override;\n\n    virtual std::shared_ptr<meta_state_service::transaction_entries>\n    new_transaction_entries(unsigned int capacity) override;\n\n    virtual task_ptr\n    submit_transaction(const std::shared_ptr<meta_state_service::transaction_entries> &entries,\n                       task_code cb_code,\n                       const err_callback &cb_transaction,\n                       task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr create_node(const std::string &node,\n                                 task_code cb_code,\n                                 const err_callback &cb_create,\n                                 const blob &value = blob(),\n                                 dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr delete_node(const std::string &node,\n                                 bool recursively_delete,\n                                 task_code cb_code,\n                                 const err_callback &cb_delete,\n                                 dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr node_exist(const std::string &node,\n                                task_code cb_code,\n                                const err_callback &cb_exist,\n                                dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr get_data(const std::string &node,\n                              task_code cb_code,\n                              const err_value_callback &cb_get_data,\n                              dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr set_data(const std::string &node,\n                              const blob &value,\n                              task_code cb_code,\n                              const err_callback &cb_set_data,\n                              dsn::task_tracker *tracker = nullptr) override;\n\n    virtual task_ptr get_children(const std::string &node,\n                                  task_code cb_code,\n                                  const err_stringv_callback &cb_get_children,\n                                  dsn::task_tracker *tracker = nullptr) override;\n\n    task_ptr delete_empty_node(const std::string &node,\n                               task_code cb_code,\n                               const err_callback &cb_delete,\n                               dsn::task_tracker *tracker);\n    int hash() const { return (int)(((uint64_t)this) & 0xffffffff); }\n\nprivate:\n    typedef ref_ptr<meta_state_service_zookeeper> ref_this;\n\n    bool _first_call;\n    int _zoo_state;\n    zookeeper_session *_session;\n    utils::notify_event _notifier;\n\n    dsn::task_tracker _tracker;\n\n    static void on_zoo_session_evt(ref_this ptr, int zoo_state);\n    static void visit_zookeeper_internal(ref_this ptr,\n                                         task_ptr callback,\n                                         void *result /*zookeeper_session::zoo_opcontext**/);\n};\n}\n}\n"
  },
  {
    "path": "src/meta/partition_guardian.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"partition_guardian.h\"\n#include \"server_load_balancer.h\"\n#include <dsn/utils/time_utils.h>\n\nnamespace dsn {\nnamespace replication {\npartition_guardian::partition_guardian(meta_service *svc) : _svc(svc)\n{\n    if (svc != nullptr) {\n        _mutation_2pc_min_replica_count = svc->get_options().mutation_2pc_min_replica_count;\n        _replica_assign_delay_ms_for_dropouts =\n            svc->get_meta_options()._lb_opts.replica_assign_delay_ms_for_dropouts;\n        config_context::MAX_REPLICA_COUNT_IN_GRROUP =\n            svc->get_meta_options()._lb_opts.max_replicas_in_group;\n    } else {\n        _mutation_2pc_min_replica_count = 0;\n        _replica_assign_delay_ms_for_dropouts = 0;\n    }\n\n    _recent_choose_primary_fail_count.init_app_counter(\n        \"eon.server_load_balancer\",\n        \"recent_choose_primary_fail_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"choose primary fail count in the recent period\");\n}\n\npc_status partition_guardian::cure(meta_view view,\n                                   const dsn::gpid &gpid,\n                                   configuration_proposal_action &action)\n{\n    if (from_proposals(view, gpid, action))\n        return pc_status::ill;\n\n    std::shared_ptr<app_state> &app = (*view.apps)[gpid.get_app_id()];\n    const partition_configuration &pc = *get_config(*(view.apps), gpid);\n    const proposal_actions &acts = get_config_context(*view.apps, gpid)->lb_actions;\n\n    dassert(app->is_stateful, \"\");\n    dassert(acts.empty(), \"\");\n\n    pc_status status;\n    if (pc.primary.is_invalid())\n        status = on_missing_primary(view, gpid);\n    else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count)\n        status = on_missing_secondary(view, gpid);\n    else if (static_cast<int>(pc.secondaries.size()) >= pc.max_replica_count)\n        status = on_redundant_secondary(view, gpid);\n    else\n        status = pc_status::healthy;\n\n    if (!acts.empty()) {\n        action = *acts.front();\n    }\n    return status;\n}\n\nvoid partition_guardian::reconfig(meta_view view, const configuration_update_request &request)\n{\n    const dsn::gpid &gpid = request.config.pid;\n    if (!((*view.apps)[gpid.get_app_id()]->is_stateful)) {\n        return;\n    }\n\n    config_context *cc = get_config_context(*(view.apps), gpid);\n    if (!cc->lb_actions.empty()) {\n        const configuration_proposal_action *current = cc->lb_actions.front();\n        dassert(current != nullptr && current->type != config_type::CT_INVALID,\n                \"invalid proposal for gpid(%d.%d)\",\n                gpid.get_app_id(),\n                gpid.get_partition_index());\n        // if the valid proposal is from cure\n        if (!cc->lb_actions.is_from_balancer()) {\n            finish_cure_proposal(view, gpid, *current);\n        }\n        cc->lb_actions.pop_front();\n    }\n\n    // handle the dropped out servers\n    if (request.type == config_type::CT_DROP_PARTITION) {\n        cc->serving.clear();\n\n        const std::vector<rpc_address> &config_dropped = request.config.last_drops;\n        for (const rpc_address &drop_node : config_dropped) {\n            cc->record_drop_history(drop_node);\n        }\n    } else {\n        when_update_replicas(request.type, [cc, &request](bool is_adding) {\n            if (is_adding) {\n                cc->remove_from_dropped(request.node);\n                // when some replicas are added to partition_config\n                // we should try to adjust the size of drop_list\n                cc->check_size();\n            } else {\n                cc->remove_from_serving(request.node);\n\n                dassert(cc->record_drop_history(request.node),\n                        \"node(%s) has been in the dropped\",\n                        request.node.to_string());\n            }\n        });\n    }\n}\n\nbool partition_guardian::from_proposals(meta_view &view,\n                                        const dsn::gpid &gpid,\n                                        configuration_proposal_action &action)\n{\n    const partition_configuration &pc = *get_config(*(view.apps), gpid);\n    config_context &cc = *get_config_context(*(view.apps), gpid);\n    bool is_action_valid;\n\n    if (cc.lb_actions.empty()) {\n        action.type = config_type::CT_INVALID;\n        return false;\n    }\n    action = *(cc.lb_actions.front());\n    char reason[1024];\n    if (action.target.is_invalid()) {\n        sprintf(reason, \"action target is invalid\");\n        goto invalid_action;\n    }\n    if (action.node.is_invalid()) {\n        sprintf(reason, \"action node is invalid\");\n        goto invalid_action;\n    }\n    if (!is_node_alive(*(view.nodes), action.target)) {\n        sprintf(reason, \"action target(%s) is not alive\", action.target.to_string());\n        goto invalid_action;\n    }\n    if (!is_node_alive(*(view.nodes), action.node)) {\n        sprintf(reason, \"action node(%s) is not alive\", action.node.to_string());\n        goto invalid_action;\n    }\n    if (cc.lb_actions.is_abnormal_learning_proposal()) {\n        sprintf(reason, \"learning process abnormal\");\n        goto invalid_action;\n    }\n\n    switch (action.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n        is_action_valid = (action.node == action.target && pc.primary.is_invalid() &&\n                           !is_secondary(pc, action.node));\n        break;\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        is_action_valid = (action.node == action.target && pc.primary.is_invalid() &&\n                           is_secondary(pc, action.node));\n        break;\n    case config_type::CT_ADD_SECONDARY:\n    case config_type::CT_ADD_SECONDARY_FOR_LB:\n        is_action_valid = (is_primary(pc, action.target) && !is_secondary(pc, action.node));\n        is_action_valid = (is_action_valid && is_node_alive(*(view.nodes), action.node));\n        break;\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n    case config_type::CT_REMOVE:\n        is_action_valid = (is_primary(pc, action.target) && is_member(pc, action.node));\n        break;\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        is_action_valid = (action.target == action.node && is_primary(pc, action.target));\n        break;\n    default:\n        is_action_valid = false;\n        break;\n    }\n\n    if (is_action_valid)\n        return true;\n    else\n        sprintf(reason, \"action is invalid\");\n\ninvalid_action:\n    std::stringstream ss;\n    ss << action;\n    ddebug(\"proposal action(%s) for gpid(%d.%d) is invalid, clear all proposal actions: %s\",\n           ss.str().c_str(),\n           gpid.get_app_id(),\n           gpid.get_partition_index(),\n           reason);\n    action.type = config_type::CT_INVALID;\n\n    while (!cc.lb_actions.empty()) {\n        configuration_proposal_action cpa = *cc.lb_actions.front();\n        if (!cc.lb_actions.is_from_balancer()) {\n            finish_cure_proposal(view, gpid, cpa);\n        }\n        cc.lb_actions.pop_front();\n    }\n    return false;\n}\n\npc_status partition_guardian::on_missing_primary(meta_view &view, const dsn::gpid &gpid)\n{\n    const partition_configuration &pc = *get_config(*(view.apps), gpid);\n    proposal_actions &acts = get_config_context(*view.apps, gpid)->lb_actions;\n\n    char gpid_name[64];\n    snprintf(gpid_name, 64, \"%d.%d\", gpid.get_app_id(), gpid.get_partition_index());\n\n    configuration_proposal_action action;\n    pc_status result = pc_status::invalid;\n\n    action.type = config_type::CT_INVALID;\n    // try to upgrade a secondary to primary if the primary is missing\n    if (pc.secondaries.size() > 0) {\n        action.node.set_invalid();\n\n        for (int i = 0; i < pc.secondaries.size(); ++i) {\n            node_state *ns = get_node_state(*(view.nodes), pc.secondaries[i], false);\n            dassert(ns != nullptr,\n                    \"invalid secondary address, address = %s\",\n                    pc.secondaries[i].to_string());\n            if (!ns->alive())\n                continue;\n\n            // find a node with minimal primaries\n            newly_partitions *np = newly_partitions_ext::get_inited(ns);\n            if (action.node.is_invalid() ||\n                np->less_primaries(*get_newly_partitions(*(view.nodes), action.node),\n                                   gpid.get_app_id())) {\n                action.node = ns->addr();\n            }\n        }\n\n        if (action.node.is_invalid()) {\n            derror(\"all nodes for gpid(%s) are dead, waiting for some secondary to come back....\",\n                   gpid_name);\n            result = pc_status::dead;\n        } else {\n            action.type = config_type::CT_UPGRADE_TO_PRIMARY;\n            newly_partitions *np = get_newly_partitions(*(view.nodes), action.node);\n            np->newly_add_primary(gpid.get_app_id(), true);\n\n            action.target = action.node;\n            result = pc_status::ill;\n        }\n    }\n    // if nothing in the last_drops, it means that this is a newly created partition, so let's\n    // just find a node and assign primary for it.\n    else if (pc.last_drops.empty()) {\n        dsn::rpc_address min_primary_server;\n        newly_partitions *min_primary_server_np = nullptr;\n\n        for (auto &pairs : *view.nodes) {\n            node_state &ns = pairs.second;\n            if (!ns.alive())\n                continue;\n            newly_partitions *np = newly_partitions_ext::get_inited(&ns);\n            // find a node which has minimal primaries\n            if (min_primary_server_np == nullptr ||\n                np->less_primaries(*min_primary_server_np, gpid.get_app_id())) {\n                min_primary_server = ns.addr();\n                min_primary_server_np = np;\n            }\n        }\n\n        if (min_primary_server_np != nullptr) {\n            action.node = min_primary_server;\n            action.target = action.node;\n            action.type = config_type::CT_ASSIGN_PRIMARY;\n            min_primary_server_np->newly_add_primary(gpid.get_app_id(), false);\n        }\n\n        result = pc_status::ill;\n    }\n    // well, all replicas in this partition is dead\n    else {\n        dwarn(\"%s enters DDD state, we are waiting for all replicas to come back, \"\n              \"and select primary according to informations collected\",\n              gpid_name);\n        // when considering how to handle the DDD state, we must keep in mind that our\n        // shared/private-log data only write to OS-cache.\n        // so the last removed replica can't act as primary directly.\n        std::string reason;\n        config_context &cc = *get_config_context(*view.apps, gpid);\n        action.node.set_invalid();\n        for (int i = 0; i < cc.dropped.size(); ++i) {\n            const dropped_replica &dr = cc.dropped[i];\n            char time_buf[30];\n            ::dsn::utils::time_ms_to_string(dr.time, time_buf);\n            ddebug(\"%s: config_context.dropped[%d]: \"\n                   \"node(%s), time(%\" PRIu64 \"){%s}, ballot(%\" PRId64 \"), \"\n                   \"commit_decree(%\" PRId64 \"), prepare_decree(%\" PRId64 \")\",\n                   gpid_name,\n                   i,\n                   dr.node.to_string(),\n                   dr.time,\n                   time_buf,\n                   dr.ballot,\n                   dr.last_committed_decree,\n                   dr.last_prepared_decree);\n        }\n\n        for (int i = 0; i < pc.last_drops.size(); ++i) {\n            int dropped_index = -1;\n            for (int k = 0; k < cc.dropped.size(); k++) {\n                if (cc.dropped[k].node == pc.last_drops[i]) {\n                    dropped_index = k;\n                    break;\n                }\n            }\n            ddebug(\"%s: config_context.last_drops[%d]: node(%s), dropped_index(%d)\",\n                   gpid_name,\n                   i,\n                   pc.last_drops[i].to_string(),\n                   dropped_index);\n        }\n\n        if (pc.last_drops.size() == 1) {\n            dwarn(\"%s: the only node(%s) is dead, waiting it to come back\",\n                  gpid_name,\n                  pc.last_drops.back().to_string());\n            action.node = pc.last_drops.back();\n        } else {\n            std::vector<dsn::rpc_address> nodes(pc.last_drops.end() - 2, pc.last_drops.end());\n            std::vector<dropped_replica> collected_info(2);\n            bool ready = true;\n\n            ddebug(\"%s: last two drops are %s and %s (the latest dropped)\",\n                   gpid_name,\n                   nodes[0].to_string(),\n                   nodes[1].to_string());\n\n            for (unsigned int i = 0; i < nodes.size(); ++i) {\n                node_state *ns = get_node_state(*view.nodes, nodes[i], false);\n                if (ns == nullptr || !ns->alive()) {\n                    ready = false;\n                    reason = \"the last dropped node(\" + nodes[i].to_std_string() +\n                             \") haven't come back yet\";\n                    dwarn(\"%s: don't select primary: %s\", gpid_name, reason.c_str());\n                } else {\n                    std::vector<dropped_replica>::iterator it = cc.find_from_dropped(nodes[i]);\n                    if (it == cc.dropped.end() || it->ballot == invalid_ballot) {\n                        if (ns->has_collected()) {\n                            ddebug(\"%s: ignore %s's replica info as it doesn't exist on replica \"\n                                   \"server\",\n                                   gpid_name,\n                                   nodes[i].to_string());\n                            collected_info[i] = {nodes[i], 0, -1, -1, -1};\n                        } else {\n                            ready = false;\n                            reason = \"the last dropped node(\" + nodes[i].to_std_string() +\n                                     \") is unavailable because \";\n                            if (it == cc.dropped.end()) {\n                                reason += \"the node is not exist in dropped_nodes\";\n                            } else {\n                                reason += \"replica info has not been collected from the node\";\n                            }\n                            dwarn(\"%s: don't select primary: %s\", gpid_name, reason.c_str());\n                        }\n                    } else {\n                        collected_info[i] = *it;\n                    }\n                }\n            }\n\n            if (ready && collected_info[0].ballot == -1 && collected_info[1].ballot == -1) {\n                ready = false;\n                reason = \"no replica info collected from the last two drops\";\n                dwarn(\"%s: don't select primary: %s\", gpid_name, reason.c_str());\n            }\n\n            if (ready) {\n                dropped_replica &previous_dead = collected_info[0];\n                dropped_replica &recent_dead = collected_info[1];\n\n                // 1. larger ballot should have larger committed decree\n                // 2. max_prepared_decree should larger than meta's committed decree\n                int64_t gap1 = previous_dead.ballot - recent_dead.ballot;\n                int64_t gap2 =\n                    previous_dead.last_committed_decree - recent_dead.last_committed_decree;\n                if (gap1 * gap2 >= 0) {\n                    int64_t larger_cd = std::max(previous_dead.last_committed_decree,\n                                                 recent_dead.last_committed_decree);\n                    int64_t larger_pd = std::max(previous_dead.last_prepared_decree,\n                                                 recent_dead.last_prepared_decree);\n                    if (larger_pd >= pc.last_committed_decree && larger_pd >= larger_cd) {\n                        if (gap1 != 0) {\n                            // 1. choose node with larger ballot\n                            action.node = gap1 < 0 ? recent_dead.node : previous_dead.node;\n                        } else if (gap2 != 0) {\n                            // 2. choose node with larger last_committed_decree\n                            action.node = gap2 < 0 ? recent_dead.node : previous_dead.node;\n                        } else {\n                            // 3. choose node with larger last_prepared_decree\n                            action.node = previous_dead.last_prepared_decree >\n                                                  recent_dead.last_prepared_decree\n                                              ? previous_dead.node\n                                              : recent_dead.node;\n                        }\n                        ddebug(\n                            \"%s: select %s as a new primary\", gpid_name, action.node.to_string());\n                    } else {\n                        char buf[1000];\n                        sprintf(buf,\n                                \"for the last two drops, larger_prepared_decree(%\" PRId64 \"), \"\n                                \"last committed decree on meta(%\" PRId64 \"), \"\n                                \"larger_committed_decree(%\" PRId64 \")\",\n                                larger_pd,\n                                pc.last_committed_decree,\n                                larger_cd);\n                        dwarn(\"%s: don't select primary: %s\", gpid_name, reason.c_str());\n                    }\n                } else {\n                    reason = \"for the last two drops, the node with larger ballot has smaller last \"\n                             \"committed decree\";\n                    dwarn(\"%s: don't select primary: %s\", gpid_name, reason.c_str());\n                }\n            }\n        }\n\n        if (!action.node.is_invalid()) {\n            action.target = action.node;\n            action.type = config_type::CT_ASSIGN_PRIMARY;\n\n            get_newly_partitions(*view.nodes, action.node)\n                ->newly_add_primary(gpid.get_app_id(), false);\n        } else {\n            dwarn(\"%s: don't select any node for security reason, administrator can select \"\n                  \"a proper one by shell\",\n                  gpid_name);\n            _recent_choose_primary_fail_count->increment();\n            ddd_partition_info pinfo;\n            pinfo.config = pc;\n            for (int i = 0; i < cc.dropped.size(); ++i) {\n                const dropped_replica &dr = cc.dropped[i];\n                ddd_node_info ninfo;\n                ninfo.node = dr.node;\n                ninfo.drop_time_ms = dr.time;\n                ninfo.ballot = invalid_ballot;\n                ninfo.last_committed_decree = invalid_decree;\n                ninfo.last_prepared_decree = invalid_decree;\n                node_state *ns = get_node_state(*view.nodes, dr.node, false);\n                if (ns != nullptr && ns->alive()) {\n                    ninfo.is_alive = true;\n                    if (ns->has_collected()) {\n                        ninfo.is_collected = true;\n                        ninfo.ballot = dr.ballot;\n                        ninfo.last_committed_decree = dr.last_committed_decree;\n                        ninfo.last_prepared_decree = dr.last_prepared_decree;\n                    }\n                }\n                pinfo.dropped.emplace_back(std::move(ninfo));\n            }\n            pinfo.reason = reason;\n            set_ddd_partition(std::move(pinfo));\n        }\n\n        result = pc_status::dead;\n    }\n\n    if (action.type != config_type::CT_INVALID) {\n        acts.assign_cure_proposal(action);\n    }\n    return result;\n}\n\npc_status partition_guardian::on_missing_secondary(meta_view &view, const dsn::gpid &gpid)\n{\n    partition_configuration &pc = *get_config(*(view.apps), gpid);\n    config_context &cc = *get_config_context(*(view.apps), gpid);\n\n    configuration_proposal_action action;\n    bool is_emergency = false;\n    if (cc.config_owner->max_replica_count > _mutation_2pc_min_replica_count &&\n        replica_count(pc) < _mutation_2pc_min_replica_count) {\n        // ATTENTION:\n        // when max_replica_count == 2, even if there is only 1 replica alive now, we will still\n        // wait for replica_assign_delay_ms_for_dropouts before recover the second replica.\n        is_emergency = true;\n        ddebug(\"gpid(%s): is emergency due to too few replicas\", gpid.to_string());\n    } else if (cc.dropped.empty()) {\n        is_emergency = true;\n        ddebug(\"gpid(%s): is emergency due to no dropped candidate\", gpid.to_string());\n    } else if (has_milliseconds_expired(cc.dropped.back().time +\n                                        _replica_assign_delay_ms_for_dropouts)) {\n        is_emergency = true;\n        char time_buf[30];\n        ::dsn::utils::time_ms_to_string(cc.dropped.back().time, time_buf);\n        ddebug(\"gpid(%s): is emergency due to lose secondary for a long time, \"\n               \"last_dropped_node(%s), drop_time(%s), delay_ms(%\" PRIu64 \")\",\n               gpid.to_string(),\n               cc.dropped.back().node.to_string(),\n               time_buf,\n               _replica_assign_delay_ms_for_dropouts);\n    } else if (in_black_list(cc.dropped.back().node)) {\n        ddebug(\"gpid(%s) is emergency due to recent dropped(%s) is in black list\",\n               gpid.to_string(),\n               cc.dropped.back().node.to_string());\n        is_emergency = true;\n    }\n    action.node.set_invalid();\n\n    if (is_emergency) {\n        std::ostringstream oss;\n        for (int i = 0; i < cc.dropped.size(); ++i) {\n            if (i != 0)\n                oss << \",\";\n            oss << cc.dropped[i].node.to_string();\n        }\n        ddebug(\"gpid(%s): try to choose node in dropped list, dropped_list(%s), \"\n               \"prefered_dropped(%d)\",\n               gpid.to_string(),\n               oss.str().c_str(),\n               cc.prefered_dropped);\n        if (cc.prefered_dropped < 0 || cc.prefered_dropped >= (int)cc.dropped.size()) {\n            ddebug(\"gpid(%s): prefered_dropped(%d) is invalid according to drop_list(size %d), \"\n                   \"reset it to %d (drop_list.size - 1)\",\n                   gpid.to_string(),\n                   cc.prefered_dropped,\n                   (int)cc.dropped.size(),\n                   (int)cc.dropped.size() - 1);\n            cc.prefered_dropped = (int)cc.dropped.size() - 1;\n        }\n\n        while (cc.prefered_dropped >= 0) {\n            const dropped_replica &server = cc.dropped[cc.prefered_dropped];\n            if (is_node_alive(*view.nodes, server.node)) {\n                ddebug(\"gpid(%s): node(%s) at cc.dropped[%d] is alive now, choose it, \"\n                       \"and forward prefered_dropped from (%d) to (%d)\",\n                       gpid.to_string(),\n                       server.node.to_string(),\n                       cc.prefered_dropped,\n                       cc.prefered_dropped,\n                       cc.prefered_dropped - 1);\n                action.node = server.node;\n                cc.prefered_dropped--;\n                break;\n            } else {\n                ddebug(\"gpid(%s): node(%s) at cc.dropped[%d] is not alive now, \"\n                       \"changed prefered_dropped from (%d) to (%d)\",\n                       gpid.to_string(),\n                       server.node.to_string(),\n                       cc.prefered_dropped,\n                       cc.prefered_dropped,\n                       cc.prefered_dropped - 1);\n                cc.prefered_dropped--;\n            }\n        }\n\n        if (action.node.is_invalid() || in_black_list(action.node)) {\n            if (!action.node.is_invalid()) {\n                ddebug(\"gpid(%s) refuse to use selected node(%s) as it is in black list\",\n                       gpid.to_string(),\n                       action.node.to_string());\n            }\n            newly_partitions *min_server_np = nullptr;\n            for (auto &pairs : *view.nodes) {\n                node_state &ns = pairs.second;\n                if (!ns.alive() || is_member(pc, ns.addr()) || in_black_list(ns.addr()))\n                    continue;\n                newly_partitions *np = newly_partitions_ext::get_inited(&ns);\n                if (min_server_np == nullptr ||\n                    np->less_partitions(*min_server_np, gpid.get_app_id())) {\n                    action.node = ns.addr();\n                    min_server_np = np;\n                }\n            }\n\n            if (!action.node.is_invalid()) {\n                ddebug(\"gpid(%s): can't find valid node in dropped list to add as secondary, \"\n                       \"choose new node(%s) with minimal partitions serving\",\n                       gpid.to_string(),\n                       action.node.to_string());\n            } else {\n                ddebug(\"gpid(%s): can't find valid node in dropped list to add as secondary, \"\n                       \"but also we can't find a new node to add as secondary\",\n                       gpid.to_string());\n            }\n        }\n    } else {\n        // if not emergency, only try to recover last dropped server\n        const dropped_replica &server = cc.dropped.back();\n        if (is_node_alive(*view.nodes, server.node)) {\n            dassert(!server.node.is_invalid(),\n                    \"invalid server address, address = %s\",\n                    server.node.to_string());\n            action.node = server.node;\n        }\n\n        if (!action.node.is_invalid()) {\n            ddebug(\"gpid(%s): choose node(%s) as secondary coz it is last_dropped_node and is \"\n                   \"alive now\",\n                   gpid.to_string(),\n                   server.node.to_string());\n        } else {\n            ddebug(\"gpid(%s): can't add secondary coz last_dropped_node(%s) is not alive now, \"\n                   \"ignore this as not in emergency\",\n                   gpid.to_string(),\n                   server.node.to_string());\n        }\n    }\n\n    if (!action.node.is_invalid()) {\n        action.type = config_type::CT_ADD_SECONDARY;\n        action.target = pc.primary;\n\n        newly_partitions *np = get_newly_partitions(*(view.nodes), action.node);\n        dassert(np != nullptr, \"\");\n        np->newly_add_partition(gpid.get_app_id());\n\n        cc.lb_actions.assign_cure_proposal(action);\n    }\n\n    return pc_status::ill;\n}\n\npc_status partition_guardian::on_redundant_secondary(meta_view &view, const dsn::gpid &gpid)\n{\n    const node_mapper &nodes = *(view.nodes);\n    const partition_configuration &pc = *get_config(*(view.apps), gpid);\n    int target = 0;\n    int load = nodes.find(pc.secondaries.front())->second.partition_count();\n    for (int i = 0; i != pc.secondaries.size(); ++i) {\n        int l = nodes.find(pc.secondaries[i])->second.partition_count();\n        if (l > load) {\n            load = l;\n            target = i;\n        }\n    }\n\n    configuration_proposal_action action;\n    action.type = config_type::CT_REMOVE;\n    action.node = pc.secondaries[target];\n    action.target = pc.primary;\n\n    // TODO: treat remove as cure proposals too\n    get_config_context(*view.apps, gpid)->lb_actions.assign_balancer_proposals({action});\n    return pc_status::ill;\n}\n\nvoid partition_guardian::finish_cure_proposal(meta_view &view,\n                                              const dsn::gpid &gpid,\n                                              const configuration_proposal_action &act)\n{\n    newly_partitions *np = get_newly_partitions(*(view.nodes), act.node);\n    if (np == nullptr) {\n        ddebug(\"can't get the newly_partitions extension structure for node(%s), \"\n               \"the node may be dead and removed\",\n               act.node.to_string());\n    } else {\n        if (act.type == config_type::CT_ASSIGN_PRIMARY) {\n            np->newly_remove_primary(gpid.get_app_id(), false);\n        } else if (act.type == config_type::CT_UPGRADE_TO_PRIMARY) {\n            np->newly_remove_primary(gpid.get_app_id(), true);\n        } else if (act.type == config_type::CT_UPGRADE_TO_SECONDARY ||\n                   act.type == config_type::CT_ADD_SECONDARY) {\n            np->newly_remove_partition(gpid.get_app_id());\n        }\n    }\n}\n\nvoid partition_guardian::register_ctrl_commands()\n{\n    _ctrl_assign_delay_ms = dsn::command_manager::instance().register_command(\n        {\"meta.lb.assign_delay_ms\"},\n        \"lb.assign_delay_ms [num | DEFAULT]\",\n        \"control the replica_assign_delay_ms_for_dropouts config\",\n        [this](const std::vector<std::string> &args) { return ctrl_assign_delay_ms(args); });\n\n    _ctrl_assign_secondary_black_list = dsn::command_manager::instance().register_command(\n        {\"meta.lb.assign_secondary_black_list\"},\n        \"lb.assign_secondary_black_list [<ip:port,ip:port,ip:port>|clear]\",\n        \"control the assign secondary black list\",\n        [this](const std::vector<std::string> &args) {\n            return ctrl_assign_secondary_black_list(args);\n        });\n}\n\nvoid partition_guardian::unregister_ctrl_commands()\n{\n    UNREGISTER_VALID_HANDLER(_ctrl_assign_delay_ms);\n    UNREGISTER_VALID_HANDLER(_ctrl_assign_secondary_black_list);\n}\n\nstd::string partition_guardian::ctrl_assign_delay_ms(const std::vector<std::string> &args)\n{\n    std::string result(\"OK\");\n    if (args.empty()) {\n        result = std::to_string(_replica_assign_delay_ms_for_dropouts);\n    } else {\n        if (args[0] == \"DEFAULT\") {\n            _replica_assign_delay_ms_for_dropouts =\n                _svc->get_meta_options()._lb_opts.replica_assign_delay_ms_for_dropouts;\n        } else {\n            int32_t v = 0;\n            if (!dsn::buf2int32(args[0], v) || v <= 0) {\n                result = std::string(\"ERR: invalid arguments\");\n            } else {\n                _replica_assign_delay_ms_for_dropouts = v;\n            }\n        }\n    }\n    return result;\n}\n\nstd::string\npartition_guardian::ctrl_assign_secondary_black_list(const std::vector<std::string> &args)\n{\n    std::string invalid_arguments(\"invalid arguments\");\n    std::stringstream oss;\n    if (args.empty()) {\n        dsn::zauto_read_lock l(_black_list_lock);\n        oss << \"get ok: \";\n        for (auto iter = _assign_secondary_black_list.begin();\n             iter != _assign_secondary_black_list.end();\n             ++iter) {\n            if (iter != _assign_secondary_black_list.begin())\n                oss << \",\";\n            oss << iter->to_string();\n        }\n        return oss.str();\n    }\n\n    if (args.size() != 1) {\n        return invalid_arguments;\n    }\n\n    dsn::zauto_write_lock l(_black_list_lock);\n    if (args[0] == \"clear\") {\n        _assign_secondary_black_list.clear();\n        return \"clear ok\";\n    }\n\n    std::vector<std::string> ip_ports;\n    dsn::utils::split_args(args[0].c_str(), ip_ports, ',');\n    if (args.size() == 0) {\n        return invalid_arguments;\n    }\n\n    std::set<dsn::rpc_address> addr_list;\n    for (const std::string &s : ip_ports) {\n        dsn::rpc_address addr;\n        if (!addr.from_string_ipv4(s.c_str())) {\n            return invalid_arguments;\n        }\n        addr_list.insert(addr);\n    }\n    _assign_secondary_black_list = std::move(addr_list);\n    return \"set ok\";\n}\n\nvoid partition_guardian::get_ddd_partitions(const gpid &pid,\n                                            std::vector<ddd_partition_info> &partitions)\n{\n    zauto_lock l(_ddd_partitions_lock);\n    if (pid.get_app_id() == -1) {\n        partitions.reserve(_ddd_partitions.size());\n        for (const auto &kv : _ddd_partitions) {\n            partitions.push_back(kv.second);\n        }\n    } else if (pid.get_partition_index() == -1) {\n        for (const auto &kv : _ddd_partitions) {\n            if (kv.first.get_app_id() == pid.get_app_id()) {\n                partitions.push_back(kv.second);\n            }\n        }\n    } else {\n        auto find = _ddd_partitions.find(pid);\n        if (find != _ddd_partitions.end()) {\n            partitions.push_back(find->second);\n        }\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/partition_guardian.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\nclass meta_service;\n\nclass partition_guardian\n{\npublic:\n    template <typename T>\n    static partition_guardian *create(meta_service *svc)\n    {\n        return new T(svc);\n    }\n    typedef partition_guardian *(*factory)(meta_service *svc);\n\n    explicit partition_guardian(meta_service *svc);\n    virtual ~partition_guardian() = default;\n\n    virtual pc_status\n    cure(meta_view view, const dsn::gpid &gpid, configuration_proposal_action &action);\n    void reconfig(meta_view view, const configuration_update_request &request);\n    void register_ctrl_commands();\n    void unregister_ctrl_commands();\n    void get_ddd_partitions(const gpid &pid, std::vector<ddd_partition_info> &partitions);\n    void clear_ddd_partitions()\n    {\n        zauto_lock l(_ddd_partitions_lock);\n        _ddd_partitions.clear();\n    }\n\nprivate:\n    bool\n    from_proposals(meta_view &view, const dsn::gpid &gpid, configuration_proposal_action &action);\n    pc_status on_missing_primary(meta_view &view, const dsn::gpid &gpid);\n    pc_status on_missing_secondary(meta_view &view, const dsn::gpid &gpid);\n    pc_status on_redundant_secondary(meta_view &view, const dsn::gpid &gpid);\n    // if a proposal is generated by cure, meta will record the POSSIBLE PARTITION COUNT\n    // IN FUTURE of a node with module \"newly_partitions\".\n    // the side effect should be eliminated when a proposal is finished, no matter\n    // successfully or unsuccessfully\n    void finish_cure_proposal(meta_view &view,\n                              const dsn::gpid &gpid,\n                              const configuration_proposal_action &action);\n    std::string ctrl_assign_delay_ms(const std::vector<std::string> &args);\n    std::string ctrl_assign_secondary_black_list(const std::vector<std::string> &args);\n\n    void set_ddd_partition(ddd_partition_info &&partition)\n    {\n        zauto_lock l(_ddd_partitions_lock);\n        _ddd_partitions[partition.config.pid] = std::move(partition);\n    }\n\n    bool in_black_list(dsn::rpc_address addr)\n    {\n        dsn::zauto_read_lock l(_black_list_lock);\n        return _assign_secondary_black_list.count(addr) != 0;\n    }\n\n    meta_service *_svc;\n    perf_counter_wrapper _recent_choose_primary_fail_count;\n\n    mutable zlock _ddd_partitions_lock; // [\n    std::map<gpid, ddd_partition_info> _ddd_partitions;\n    // ]\n\n    // NOTICE: the command handler is called in THREADPOOL_DEFAULT\n    // but when adding secondary, the black list is accessed in THREADPOOL_META_STATE\n    // so we need a lock to protect it\n    dsn::zrwlock_nr _black_list_lock; // [\n    std::set<dsn::rpc_address> _assign_secondary_black_list;\n    // ]\n    dsn_handle_t _ctrl_assign_secondary_black_list = nullptr;\n\n    int32_t _mutation_2pc_min_replica_count;\n    dsn_handle_t _ctrl_assign_delay_ms = nullptr;\n    uint64_t _replica_assign_delay_ms_for_dropouts;\n\n    friend class meta_partition_guardian_test;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/server_load_balancer.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"server_load_balancer.h\"\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/tool-api/command_manager.h>\n#include <boost/lexical_cast.hpp>\n#include <dsn/utils/time_utils.h>\n\nnamespace dsn {\nnamespace replication {\nnewly_partitions::newly_partitions() : newly_partitions(nullptr) {}\n\nnewly_partitions::newly_partitions(node_state *ns)\n    : owner(ns), total_primaries(0), total_partitions(0)\n{\n}\n\nvoid *newly_partitions::s_create(void *related_ns)\n{\n    newly_partitions *result = new newly_partitions(reinterpret_cast<node_state *>(related_ns));\n    return result;\n}\n\nvoid newly_partitions::s_delete(void *_this) { delete reinterpret_cast<newly_partitions *>(_this); }\n\nbool newly_partitions::less_primaries(newly_partitions &another, int32_t app_id)\n{\n    int newly_p1 = primary_count(app_id);\n    int newly_p2 = another.primary_count(app_id);\n    if (newly_p1 != newly_p2)\n        return newly_p1 < newly_p2;\n\n    newly_p1 = partition_count(app_id);\n    newly_p2 = another.partition_count(app_id);\n    if (newly_p1 != newly_p2)\n        return newly_p1 < newly_p2;\n\n    newly_p1 = primary_count();\n    newly_p2 = another.primary_count();\n    if (newly_p1 != newly_p2)\n        return newly_p1 < newly_p2;\n\n    return partition_count() < another.partition_count();\n}\n\nbool newly_partitions::less_partitions(newly_partitions &another, int32_t app_id)\n{\n    int newly_p1 = partition_count(app_id);\n    int newly_p2 = another.partition_count(app_id);\n    if (newly_p1 != newly_p2)\n        return newly_p1 < newly_p2;\n\n    return partition_count() < another.partition_count();\n}\n\nvoid newly_partitions::newly_add_primary(int32_t app_id, bool only_primary)\n{\n    ++primaries[app_id];\n    ++total_primaries;\n    if (!only_primary) {\n        ++partitions[app_id];\n        ++total_partitions;\n    }\n}\n\nvoid newly_partitions::newly_add_partition(int32_t app_id)\n{\n    ++partitions[app_id];\n    ++total_partitions;\n}\n\nvoid newly_partitions::newly_remove_primary(int32_t app_id, bool only_primary)\n{\n    auto iter = primaries.find(app_id);\n    dassert(iter != primaries.end(), \"invalid app_id, app_id = %d\", app_id);\n    dassert(iter->second > 0, \"invalid primary count, cnt = %d\", iter->second);\n    if (0 == (--iter->second)) {\n        primaries.erase(iter);\n    }\n\n    dassert(total_primaries > 0, \"invalid total primaires = %d\", total_primaries);\n    --total_primaries;\n\n    if (!only_primary) {\n        newly_remove_partition(app_id);\n    }\n}\n\nvoid newly_partitions::newly_remove_partition(int32_t app_id)\n{\n    auto iter = partitions.find(app_id);\n    dassert(iter != partitions.end(), \"invalid app_id, app_id = %d\", app_id);\n    dassert(iter->second > 0, \"invalid partition count, cnt = %d\", iter->second);\n    if ((--iter->second) == 0) {\n        partitions.erase(iter);\n    }\n\n    dassert(total_partitions > 0, \"invalid total partitions = \", total_partitions);\n    --total_partitions;\n}\n\nnewly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr)\n{\n    node_state *ns = get_node_state(mapper, addr, false);\n    if (ns == nullptr)\n        return nullptr;\n    return newly_partitions_ext::get_inited(ns);\n}\n\nclass local_module_initializer\n{\nprivate:\n    local_module_initializer()\n    {\n        newly_partitions_ext::register_ext(newly_partitions::s_create, newly_partitions::s_delete);\n    }\n\npublic:\n    static local_module_initializer _instance;\n};\nlocal_module_initializer local_module_initializer::_instance;\n//// end of server load balancer extensions for node state\n\nserver_load_balancer::server_load_balancer(meta_service *svc) : _svc(svc) {}\n\nvoid server_load_balancer::register_proposals(meta_view view,\n                                              const configuration_balancer_request &req,\n                                              configuration_balancer_response &resp)\n{\n    config_context &cc = *get_config_context(*view.apps, req.gpid);\n    partition_configuration &pc = *get_config(*view.apps, req.gpid);\n    if (!cc.lb_actions.empty()) {\n        resp.err = ERR_INVALID_PARAMETERS;\n        return;\n    }\n\n    std::vector<configuration_proposal_action> acts = req.action_list;\n    for (configuration_proposal_action &act : acts) {\n        // for some client generated proposals, the sender may not know the primary address.\n        // e.g: \"copy_secondary from a to b\".\n        // the client only knows the secondary a and secondary b, it doesn't know which target\n        // to send the proposal to.\n        // for these proposals, they should keep the target empty and\n        // the meta-server will fill primary as target.\n        if (act.target.is_invalid()) {\n            if (!pc.primary.is_invalid())\n                act.target = pc.primary;\n            else {\n                resp.err = ERR_INVALID_PARAMETERS;\n                return;\n            }\n        }\n    }\n\n    resp.err = ERR_OK;\n    cc.lb_actions.assign_balancer_proposals(acts);\n    return;\n}\n\nvoid server_load_balancer::apply_balancer(meta_view view, const migration_list &ml)\n{\n    if (!ml.empty()) {\n        configuration_balancer_response resp;\n        for (auto &pairs : ml) {\n            register_proposals(view, *pairs.second, resp);\n            if (resp.err != dsn::ERR_OK) {\n                const dsn::gpid &pid = pairs.first;\n                dassert(false,\n                        \"apply balancer for gpid(%d.%d) failed\",\n                        pid.get_app_id(),\n                        pid.get_partition_index());\n            }\n        }\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/server_load_balancer.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     base interface of the server load balancer which defines the scheduling\n *     policy of how to place the partition replica to the nodes\n *\n * Revision history:\n *     2015-12-29, @imzhenyu (Zhenyu Guo), first draft\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/utility/error_code.h>\n#include <string>\n#include <functional>\n#include <memory>\n#include <algorithm>\n#include <set>\n#include \"meta_data.h\"\n#include \"meta_service.h\"\n\nnamespace dsn {\nnamespace replication {\n/// server load balancer extensions for node_state\n/// record the newly assigned but not finished replicas for each node, to make the assigning\n/// process more balanced.\nclass newly_partitions\n{\npublic:\n    newly_partitions();\n    newly_partitions(node_state *ns);\n    node_state *owner;\n    int total_primaries;\n    int total_partitions;\n    std::map<int32_t, int32_t> primaries;\n    std::map<int32_t, int32_t> partitions;\n\n    int32_t primary_count(int32_t app_id)\n    {\n        return owner->primary_count(app_id) + primaries[app_id];\n    }\n    int32_t partition_count(int32_t app_id)\n    {\n        return owner->partition_count(app_id) + partitions[app_id];\n    }\n\n    int32_t primary_count() { return total_primaries + owner->primary_count(); }\n    int32_t partition_count() { return total_partitions + owner->partition_count(); }\n\n    bool less_primaries(newly_partitions &another, int32_t app_id);\n    bool less_partitions(newly_partitions &another, int32_t app_id);\n    void newly_add_primary(int32_t app_id, bool only_primary);\n    void newly_add_partition(int32_t app_id);\n\n    void newly_remove_primary(int32_t app_id, bool only_primary);\n    void newly_remove_partition(int32_t app_id);\n\npublic:\n    static void *s_create(void *related_ns);\n    static void s_delete(void *_this);\n};\ntypedef dsn::object_extension_helper<newly_partitions, node_state> newly_partitions_ext;\nnewly_partitions *get_newly_partitions(node_mapper &mapper, const dsn::rpc_address &addr);\n\nclass server_load_balancer\n{\npublic:\n    template <typename T>\n    static server_load_balancer *create(meta_service *svc)\n    {\n        return new T(svc);\n    }\n    typedef server_load_balancer *(*factory)(meta_service *svc);\n\npublic:\n    server_load_balancer(meta_service *svc);\n    virtual ~server_load_balancer() {}\n\n    //\n    // Make balancer proposals by round according to current meta-view\n    // params:\n    //   view: current meta-view\n    //   list: the returned balance results\n    // ret:\n    //   if any balancer proposal is generated, return true. Or-else, false\n    //\n    virtual bool balance(meta_view view, migration_list &list) = 0;\n\n    //\n    // Make full balancer proposals according to current meta-view\n    // params:\n    //   view: current meta-view\n    //   list: the returned balance results\n    // ret:\n    //   if any balancer proposal is generated, return true. Or-else, false\n    //\n    virtual bool check(meta_view view, migration_list &list) = 0;\n\n    //\n    // Report balancer proposals\n    // params:\n    //   list: balancer proposals\n    //   balance_checker: report the count of balance operation to be done if true, otherwise report\n    //   both the operation count and action details done by balancer\n    //\n    virtual void report(const migration_list &list, bool balance_checker) = 0;\n\n    //\n    // Calculate cluster balance score\n    // params:\n    //   view: current meta-view\n    //   primary_stddev: output, stddev of primary count on each node\n    //   total_stddev: output, stddev of total replica count on each node\n    //\n    virtual void\n    score(meta_view view, double &primary_stddev /*out*/, double &total_stddev /*out*/) = 0;\n\n    void register_proposals(meta_view view,\n                            const configuration_balancer_request &req,\n                            configuration_balancer_response &resp);\n    void apply_balancer(meta_view view, const migration_list &ml);\n\n    //\n    // Try to register some cli-commands\n    //\n    // ATTENTION: because this function will register the cli-commands to singleton-container,\n    // so\n    // you must unregister the commands that you have already registered or release the instance\n    // of\n    // server_load_balancer before you call this function again\n    //\n    virtual void register_ctrl_commands() {}\n\n    //\n    // Try to unregister cli-commands\n    //\n    virtual void unregister_ctrl_commands() {}\n\n    //\n    // Get balancer proposal counts\n    // params:\n    //   args: proposal type\n    // ret: balancer proposal counts in string\n    //\n    virtual std::string get_balance_operation_count(const std::vector<std::string> &args) = 0;\n\npublic:\n    typedef std::function<bool(const rpc_address &addr1, const rpc_address &addr2)> node_comparator;\n    static node_comparator primary_comparator(const node_mapper &nodes)\n    {\n        return [&nodes](const rpc_address &r1, const rpc_address &r2) {\n            int p1 = nodes.find(r1)->second.primary_count();\n            int p2 = nodes.find(r2)->second.primary_count();\n            if (p1 != p2)\n                return p1 < p2;\n            return r1 < r2;\n        };\n    }\n\n    static node_comparator partition_comparator(const node_mapper &nodes)\n    {\n        return [&nodes](const rpc_address &r1, const rpc_address &r2) {\n            int p1 = nodes.find(r1)->second.partition_count();\n            int p2 = nodes.find(r2)->second.partition_count();\n            if (p1 != p2)\n                return p1 < p2;\n            return r1 < r2;\n        };\n    }\n\nprotected:\n    meta_service *_svc;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/server_state.cpp",
    "content": "/*\n\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's server_state, impl file\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), refactor\n */\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/strings.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/async_calls.h>\n#include <sstream>\n#include <cinttypes>\n#include <string>\n#include <boost/lexical_cast.hpp>\n\n#include \"server_state.h\"\n#include \"server_load_balancer.h\"\n#include \"dump_file.h\"\n#include \"app_env_validator.h\"\n#include \"meta_bulk_load_service.h\"\n\nusing namespace dsn;\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_int32(\"meta_server\",\n                 max_allowed_replica_count,\n                 5,\n                 \"max replica count allowed for any app of a cluster\");\nDSN_TAG_VARIABLE(max_allowed_replica_count, FT_MUTABLE);\nDSN_DEFINE_validator(max_allowed_replica_count, [](int32_t allowed_replica_count) -> bool {\n    return allowed_replica_count > 0;\n});\n\nDSN_DEFINE_int32(\"meta_server\",\n                 min_allowed_replica_count,\n                 1,\n                 \"min replica count allowed for any app of a cluster\");\nDSN_TAG_VARIABLE(min_allowed_replica_count, FT_MUTABLE);\nDSN_DEFINE_validator(min_allowed_replica_count, [](int32_t allowed_replica_count) -> bool {\n    return allowed_replica_count > 0;\n});\n\nDSN_DEFINE_group_validator(min_max_allowed_replica_count, [](std::string &message) -> bool {\n    if (FLAGS_min_allowed_replica_count > FLAGS_max_allowed_replica_count) {\n        message = fmt::format(\"meta_server.min_allowed_replica_count({}) should be <= \"\n                              \"meta_server.max_allowed_replica_count({})\",\n                              FLAGS_min_allowed_replica_count,\n                              FLAGS_max_allowed_replica_count);\n        return false;\n    }\n    return true;\n});\n\nstatic const char *lock_state = \"lock\";\nstatic const char *unlock_state = \"unlock\";\n\nserver_state::server_state()\n    : _meta_svc(nullptr),\n      _add_secondary_enable_flow_control(false),\n      _add_secondary_max_count_for_one_node(0),\n      _cli_dump_handle(nullptr),\n      _ctrl_add_secondary_enable_flow_control(nullptr),\n      _ctrl_add_secondary_max_count_for_one_node(nullptr)\n{\n}\n\nserver_state::~server_state()\n{\n    _tracker.cancel_outstanding_tasks();\n    UNREGISTER_VALID_HANDLER(_cli_dump_handle);\n    UNREGISTER_VALID_HANDLER(_ctrl_add_secondary_enable_flow_control);\n    UNREGISTER_VALID_HANDLER(_ctrl_add_secondary_max_count_for_one_node);\n}\n\nvoid server_state::register_cli_commands()\n{\n    _cli_dump_handle = dsn::command_manager::instance().register_command(\n        {\"meta.dump\"},\n        \"meta.dump - dump app_states of meta server to local file\",\n        \"meta.dump -t|--target target_file\",\n        [this](const std::vector<std::string> &args) {\n            dsn::error_code err;\n            if (args.size() != 2) {\n                err = ERR_INVALID_PARAMETERS;\n            } else {\n                const char *target_file = nullptr;\n                for (int i = 0; i < args.size(); i += 2) {\n                    if (strcmp(args[i].c_str(), \"-t\") == 0 ||\n                        strcmp(args[i].c_str(), \"--target\") == 0)\n                        target_file = args[i + 1].c_str();\n                }\n                if (target_file == nullptr) {\n                    err = ERR_INVALID_PARAMETERS;\n                } else {\n                    err = this->dump_from_remote_storage(target_file, false);\n                }\n            }\n            return std::string(err.to_string());\n        });\n    dassert(_cli_dump_handle != nullptr, \"register cli handler failed\");\n\n    _ctrl_add_secondary_enable_flow_control = dsn::command_manager::instance().register_command(\n        {\"meta.lb.add_secondary_enable_flow_control\"},\n        \"meta.lb.add_secondary_enable_flow_control <true|false>\",\n        \"control whether enable add secondary flow control\",\n        [this](const std::vector<std::string> &args) {\n            return remote_command_set_bool_flag(\n                _add_secondary_enable_flow_control, \"lb.add_secondary_enable_flow_control\", args);\n        });\n    dassert(_ctrl_add_secondary_enable_flow_control, \"register cli handler failed\");\n\n    _ctrl_add_secondary_max_count_for_one_node = dsn::command_manager::instance().register_command(\n        {\"meta.lb.add_secondary_max_count_for_one_node\"},\n        \"meta.lb.add_secondary_max_count_for_one_node [num | DEFAULT]\",\n        \"control the max count to add secondary for one node\",\n        [this](const std::vector<std::string> &args) {\n            std::string result(\"OK\");\n            if (args.empty()) {\n                result = std::to_string(_add_secondary_max_count_for_one_node);\n            } else {\n                if (args[0] == \"DEFAULT\") {\n                    _add_secondary_max_count_for_one_node =\n                        _meta_svc->get_meta_options().add_secondary_max_count_for_one_node;\n                } else {\n                    int32_t v = 0;\n                    if (!dsn::buf2int32(args[0], v) || v < 0) {\n                        result = std::string(\"ERR: invalid arguments\");\n                    } else {\n                        _add_secondary_max_count_for_one_node = v;\n                    }\n                }\n            }\n            return result;\n        });\n    dassert(_ctrl_add_secondary_max_count_for_one_node, \"register cli handler failed\");\n}\n\nvoid server_state::initialize(meta_service *meta_svc, const std::string &apps_root)\n{\n    _meta_svc = meta_svc;\n    _apps_root = apps_root;\n    _add_secondary_enable_flow_control =\n        _meta_svc->get_meta_options().add_secondary_enable_flow_control;\n    _add_secondary_max_count_for_one_node =\n        _meta_svc->get_meta_options().add_secondary_max_count_for_one_node;\n\n    _dead_partition_count.init_app_counter(\"eon.server_state\",\n                                           \"dead_partition_count\",\n                                           COUNTER_TYPE_NUMBER,\n                                           \"current dead partition count\");\n    _unreadable_partition_count.init_app_counter(\"eon.server_state\",\n                                                 \"unreadable_partition_count\",\n                                                 COUNTER_TYPE_NUMBER,\n                                                 \"current unreadable partition count\");\n    _unwritable_partition_count.init_app_counter(\"eon.server_state\",\n                                                 \"unwritable_partition_count\",\n                                                 COUNTER_TYPE_NUMBER,\n                                                 \"current unwritable partition count\");\n    _writable_ill_partition_count.init_app_counter(\"eon.server_state\",\n                                                   \"writable_ill_partition_count\",\n                                                   COUNTER_TYPE_NUMBER,\n                                                   \"current writable ill partition count\");\n    _healthy_partition_count.init_app_counter(\"eon.server_state\",\n                                              \"healthy_partition_count\",\n                                              COUNTER_TYPE_NUMBER,\n                                              \"current healthy partition count\");\n    _recent_update_config_count.init_app_counter(\"eon.server_state\",\n                                                 \"recent_update_config_count\",\n                                                 COUNTER_TYPE_VOLATILE_NUMBER,\n                                                 \"update configuration count in the recent period\");\n    _recent_partition_change_unwritable_count.init_app_counter(\n        \"eon.server_state\",\n        \"recent_partition_change_unwritable_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"partition change to unwritable count in the recent period\");\n    _recent_partition_change_writable_count.init_app_counter(\n        \"eon.server_state\",\n        \"recent_partition_change_writable_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"partition change to writable count in the recent period\");\n}\n\nbool server_state::spin_wait_staging(int timeout_seconds)\n{\n    while ((timeout_seconds == -1 || timeout_seconds > 0)) {\n        int c = 0;\n        {\n            zauto_read_lock l(_lock);\n            c = count_staging_app();\n        }\n        if (c == 0) {\n            return true;\n        }\n        ddebug(\"there are (%d) apps still in staging, just wait...\", c);\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n        if (timeout_seconds > 0) {\n            --timeout_seconds;\n        }\n    }\n    return false;\n}\n\nint server_state::count_staging_app()\n{\n    int ans = 0;\n    for (const auto &app_kv : _all_apps) {\n        if (app_kv.second->status == app_status::AS_CREATING ||\n            app_kv.second->status == app_status::AS_DROPPING ||\n            app_kv.second->status == app_status::AS_RECALLING)\n            ++ans;\n    }\n    return ans;\n}\n\nvoid server_state::transition_staging_state(std::shared_ptr<app_state> &app)\n{\n#define send_response(meta, msg, response_data)                                                    \\\n    do {                                                                                           \\\n        if (msg != nullptr) {                                                                      \\\n            meta->reply_data(msg, response_data);                                                  \\\n            msg->release_ref();                                                                    \\\n            msg = nullptr;                                                                         \\\n        }                                                                                          \\\n    } while (0)\n\n    app_status::type old_status = app->status;\n    if (app->status == app_status::AS_CREATING) {\n        app->status = app_status::AS_AVAILABLE;\n        configuration_create_app_response resp;\n        resp.err = dsn::ERR_OK;\n        resp.appid = app->app_id;\n        send_response(_meta_svc, app->helpers->pending_response, resp);\n    } else if (app->status == app_status::AS_DROPPING) {\n        app->status = app_status::AS_DROPPED;\n        configuration_drop_app_response resp;\n        resp.err = dsn::ERR_OK;\n        send_response(_meta_svc, app->helpers->pending_response, resp);\n    } else if (app->status == app_status::AS_RECALLING) {\n        app->status = app_status::AS_AVAILABLE;\n        configuration_recall_app_response resp;\n        resp.err = dsn::ERR_OK;\n        resp.info = *app;\n        send_response(_meta_svc, app->helpers->pending_response, resp);\n    } else {\n        dassert(false,\n                \"app(%s) not in staging state(%s)\",\n                app->get_logname(),\n                enum_to_string(app->status));\n    }\n\n    ddebug(\"app(%s) transfer from %s to %s\",\n           app->get_logname(),\n           enum_to_string(old_status),\n           enum_to_string(app->status));\n#undef send_response\n}\n\nvoid server_state::process_one_partition(std::shared_ptr<app_state> &app)\n{\n    int ans = --app->helpers->partitions_in_progress;\n    if (ans > 0) {\n        dinfo(\"app(%s) in status %s, can't transfer to stable state as some partition is in \"\n              \"progressing\",\n              app->get_logname(),\n              enum_to_string(app->status));\n        return;\n    } else if (ans == 0) {\n        transition_staging_state(app);\n    } else {\n        dassert(false, \"partitions in progress(%d) shouldn't be negetive\", ans);\n    }\n}\n\nerror_code server_state::dump_app_states(const char *local_path,\n                                         const std::function<app_state *()> &iterator)\n{\n    std::shared_ptr<dump_file> file = dump_file::open_file(local_path, true);\n    if (file == nullptr) {\n        derror(\"open file failed, file(%s)\", local_path);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    file->append_buffer(\"binary\", 6);\n    app_state *app;\n    while ((app = iterator()) != nullptr) {\n        dassert(app->status == app_status::AS_AVAILABLE || app->status == app_status::AS_DROPPED,\n                \"invalid app status\");\n        binary_writer writer;\n        dsn::marshall(writer, *app, DSF_THRIFT_BINARY);\n        file->append_buffer(writer.get_buffer());\n        for (const partition_configuration &pc : app->partitions) {\n            binary_writer writer;\n            dsn::marshall(writer, pc, DSF_THRIFT_BINARY);\n            file->append_buffer(writer.get_buffer());\n        }\n    }\n    return ERR_OK;\n}\n\nerror_code server_state::dump_from_remote_storage(const char *local_path, bool sync_immediately)\n{\n    error_code ec;\n\n    if (sync_immediately) {\n        ec = sync_apps_from_remote_storage();\n        if (ec == ERR_OBJECT_NOT_FOUND) {\n            ddebug(\"remote storage is empty, just stop the dump\");\n            return ERR_OK;\n        } else if (ec != ERR_OK) {\n            derror(\"sync from remote storage failed, err(%s)\", ec.to_string());\n            return ec;\n        } else {\n            spin_wait_staging();\n        }\n        auto iter_begin = _all_apps.begin();\n        auto iter_end = _all_apps.end();\n        return dump_app_states(local_path, [&iter_begin, &iter_end]() -> app_state * {\n            if (iter_begin == iter_end)\n                return nullptr;\n            app_state *result = iter_begin->second.get();\n            ++iter_begin;\n            return result;\n        });\n    } else {\n        std::vector<app_state> snapshots;\n        {\n            zauto_read_lock l(_lock);\n            if (count_staging_app() != 0) {\n                ddebug(\"there are apps in staging, skip this dump\");\n                return ERR_INVALID_STATE;\n            }\n            snapshots.reserve(_all_apps.size());\n            for (auto &app_pair : _all_apps)\n                snapshots.push_back(*(app_pair.second));\n        }\n        auto iter_begin = snapshots.begin(), iter_end = snapshots.end();\n        return dump_app_states(local_path, [&iter_begin, &iter_end]() -> app_state * {\n            if (iter_begin == iter_end)\n                return nullptr;\n            app_state *result = &(*iter_begin);\n            ++iter_begin;\n            return result;\n        });\n    }\n}\n\nerror_code server_state::restore_from_local_storage(const char *local_path)\n{\n    error_code ec;\n\n    std::shared_ptr<dump_file> file = dump_file::open_file(local_path, false);\n    if (file == nullptr) {\n        derror(\"open file failed, file(%s)\", local_path);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    blob data;\n    dassert(file->read_next_buffer(data) == 1, \"read format header fail\");\n    _all_apps.clear();\n\n    dassert(memcmp(data.data(), \"binary\", 6) == 0, \"\");\n    while (true) {\n        int ans = file->read_next_buffer(data);\n        dassert(ans != -1, \"read file failed\");\n        if (ans == 0) // file end\n            break;\n\n        app_info info;\n        binary_reader reader(data);\n        unmarshall(reader, info, DSF_THRIFT_BINARY);\n        std::shared_ptr<app_state> app = app_state::create(info);\n        _all_apps.emplace(app->app_id, app);\n\n        for (unsigned int i = 0; i != app->partition_count; ++i) {\n            ans = file->read_next_buffer(data);\n            binary_reader reader(data);\n            dassert(ans == 1, \"unexpect read buffer, ret(%d)\", ans);\n            unmarshall(reader, app->partitions[i], DSF_THRIFT_BINARY);\n            dassert(app->partitions[i].pid.get_partition_index() == i,\n                    \"uncorrect partition data, gpid(%d.%d), appname(%s)\",\n                    app->app_id,\n                    i,\n                    app->app_name.c_str());\n        }\n    }\n\n    for (auto &iter : _all_apps) {\n        if (iter.second->status == app_status::AS_AVAILABLE)\n            iter.second->status = app_status::AS_CREATING;\n        else {\n            dassert(iter.second->status == app_status::AS_DROPPED,\n                    \"invalid app_status, status = %s\",\n                    enum_to_string(iter.second->status));\n            iter.second->status = app_status::AS_DROPPING;\n        }\n    }\n    ec = sync_apps_to_remote_storage();\n    if (ec != ERR_OK) {\n        _all_apps.clear();\n        return ec;\n    }\n    return ERR_OK;\n}\n\nerror_code server_state::initialize_default_apps()\n{\n    std::vector<const char *> sections;\n    dsn_config_get_all_sections(sections);\n    ddebug(\"start to do initialize\");\n\n    app_info default_app;\n    for (int i = 0; i < sections.size(); i++) {\n        if (strstr(sections[i], \"meta_server.apps\") == sections[i] ||\n            strcmp(sections[i], \"replication.app\") == 0) {\n            const char *s = sections[i];\n\n            default_app.status = app_status::AS_CREATING;\n            default_app.app_id = _all_apps.size() + 1;\n\n            default_app.app_name = dsn_config_get_value_string(s, \"app_name\", \"\", \"app name\");\n            if (default_app.app_name.length() == 0) {\n                dwarn(\"'[%s] app_name' not specified, ignore this section\", s);\n                continue;\n            }\n\n            default_app.app_type = dsn_config_get_value_string(s, \"app_type\", \"\", \"app type name\");\n            default_app.partition_count = (int)dsn_config_get_value_uint64(\n                s, \"partition_count\", 1, \"how many partitions the app should have\");\n            default_app.is_stateful =\n                dsn_config_get_value_bool(s, \"stateful\", true, \"whether this is a stateful app\");\n            default_app.max_replica_count = (int)dsn_config_get_value_uint64(\n                s, \"max_replica_count\", 3, \"max replica count in app\");\n            default_app.create_second = dsn_now_ms() / 1000;\n            std::string envs_str = dsn_config_get_value_string(s, \"envs\", \"\", \"app envs\");\n            bool parse = dsn::utils::parse_kv_map(envs_str.c_str(), default_app.envs, ',', '=');\n\n            dassert(default_app.app_type.length() > 0, \"'[%s] app_type' not specified\", s);\n            dassert(default_app.partition_count > 0, \"'[%s] partition_count' should > 0\", s);\n            dassert(parse, \"'[%s] envs' is invalid, envs = %s\", s, envs_str.c_str());\n\n            std::shared_ptr<app_state> app = app_state::create(default_app);\n            _all_apps.emplace(app->app_id, app);\n        }\n    }\n\n    error_code err = sync_apps_to_remote_storage();\n    if (err != ERR_OK) {\n        _all_apps.clear();\n        return err;\n    }\n    return ERR_OK;\n}\n\n// caller should ensure all apps are in staging: creating, dropping\nerror_code server_state::sync_apps_to_remote_storage()\n{\n    _exist_apps.clear();\n    for (auto &kv_pair : _all_apps) {\n        if (kv_pair.second->status == app_status::AS_CREATING) {\n            dassert(_exist_apps.find(kv_pair.second->app_name) == _exist_apps.end(),\n                    \"invalid app name, name = %s\",\n                    kv_pair.second->app_name.c_str());\n            _exist_apps.emplace(kv_pair.second->app_name, kv_pair.second);\n        }\n    }\n\n    // create cluster_root/apps node\n    std::string &apps_path = _apps_root;\n    error_code err;\n    dist::meta_state_service *storage = _meta_svc->get_remote_storage();\n\n    auto t = storage->create_node(apps_path,\n                                  LPC_META_CALLBACK,\n                                  [&err](error_code ec) { err = ec; },\n                                  blob(lock_state, 0, strlen(lock_state)));\n    t->wait();\n\n    if (err != ERR_NODE_ALREADY_EXIST && err != ERR_OK) {\n        derror(\"create root node /apps in meta store failed, err = %s\", err.to_string());\n        return err;\n    } else {\n        ddebug(\"set %s to lock state in remote storage\", _apps_root.c_str());\n    }\n\n    err = ERR_OK;\n    dsn::task_tracker tracker;\n    for (auto &kv : _all_apps) {\n        std::shared_ptr<app_state> &app = kv.second;\n        std::string path = get_app_path(*app);\n\n        dassert(app->status == app_status::AS_CREATING || app->status == app_status::AS_DROPPING,\n                \"invalid app status\");\n        blob value = app->to_json(app_status::AS_CREATING == app->status ? app_status::AS_AVAILABLE\n                                                                         : app_status::AS_DROPPED);\n        storage->create_node(path,\n                             LPC_META_CALLBACK,\n                             [&err, path](error_code ec) {\n                                 if (ec != ERR_OK && ec != ERR_NODE_ALREADY_EXIST) {\n                                     dwarn(\"create app node failed, path(%s) reason(%s)\",\n                                           path.c_str(),\n                                           ec.to_string());\n                                     err = ec;\n                                 } else {\n                                     ddebug(\"create app node %s ok\", path.c_str());\n                                 }\n                             },\n                             value,\n                             &tracker);\n    }\n    tracker.wait_outstanding_tasks();\n\n    if (err != ERR_OK) {\n        _exist_apps.clear();\n        return err;\n    }\n    for (auto &kv : _all_apps) {\n        std::shared_ptr<app_state> &app = kv.second;\n        for (unsigned int i = 0; i != app->partition_count; ++i) {\n            task_ptr init_callback =\n                tasking::create_task(LPC_META_STATE_HIGH, &tracker, [] {}, sStateHash);\n            init_app_partition_node(app, i, init_callback);\n        }\n    }\n    tracker.wait_outstanding_tasks();\n    t = _meta_svc->get_remote_storage()->set_data(_apps_root,\n                                                  blob(unlock_state, 0, strlen(unlock_state)),\n                                                  LPC_META_STATE_HIGH,\n                                                  [&err](dsn::error_code e) { err = e; });\n    t->wait();\n    if (dsn::ERR_OK == err) {\n        ddebug(\"set %s to unlock state in remote storage\", _apps_root.c_str());\n        return err;\n    } else {\n        derror(\"set %s to unlock state in remote storage failed, reason(%s)\",\n               _apps_root.c_str(),\n               err.to_string());\n        return err;\n    }\n}\n\ndsn::error_code server_state::sync_apps_from_remote_storage()\n{\n    dsn::error_code err;\n    dsn::task_tracker tracker;\n\n    dist::meta_state_service *storage = _meta_svc->get_remote_storage();\n    auto sync_partition = [this, storage, &err, &tracker](\n        std::shared_ptr<app_state> &app, int partition_id, const std::string &partition_path) {\n        storage->get_data(\n            partition_path,\n            LPC_META_CALLBACK,\n            [this, app, partition_id, partition_path, &err](error_code ec,\n                                                            const blob &value) mutable {\n                if (ec == ERR_OK) {\n                    partition_configuration pc;\n                    dsn::json::json_forwarder<partition_configuration>::decode(value, pc);\n\n                    dassert(pc.pid.get_app_id() == app->app_id &&\n                                pc.pid.get_partition_index() == partition_id,\n                            \"invalid partition config\");\n                    {\n                        zauto_write_lock l(_lock);\n                        app->partitions[partition_id] = pc;\n                        for (const dsn::rpc_address &addr : pc.last_drops) {\n                            app->helpers->contexts[partition_id].record_drop_history(addr);\n                        }\n\n                        if (app->status == app_status::AS_CREATING &&\n                            (pc.partition_flags & pc_flags::dropped) != 0) {\n                            recall_partition(app, partition_id);\n                        } else if (app->status == app_status::AS_DROPPING &&\n                                   (pc.partition_flags & pc_flags::dropped) == 0) {\n                            drop_partition(app, partition_id);\n                        } else\n                            process_one_partition(app);\n                        // check consistency between app bulk_loading flag and app bulk load dir\n                        if (app->helpers->partitions_in_progress.load() == 0 &&\n                            app->status == app_status::AS_AVAILABLE &&\n                            _meta_svc->get_bulk_load_service()) {\n                            bool is_bulk_loading = app->is_bulk_loading;\n                            _meta_svc->get_bulk_load_service()->check_app_bulk_load_states(\n                                std::move(app), is_bulk_loading);\n                        }\n                    }\n                } else if (ec == ERR_OBJECT_NOT_FOUND) {\n                    auto init_partition_count = app->init_partition_count > 0\n                                                    ? app->init_partition_count\n                                                    : app->partition_count;\n                    if (partition_id < init_partition_count) {\n                        dwarn_f(\n                            \"partition node {} not exist on remote storage, may half create before\",\n                            partition_path);\n                        init_app_partition_node(app, partition_id, nullptr);\n                    } else if (partition_id >= app->partition_count / 2) {\n                        dwarn_f(\n                            \"partition node {} not exist on remote storage, may half split before\",\n                            partition_path);\n                        zauto_write_lock l(_lock);\n                        app->helpers->split_states.status[partition_id - app->partition_count / 2] =\n                            split_status::SPLITTING;\n                        app->helpers->split_states.splitting_count++;\n                        app->partitions[partition_id].ballot = invalid_ballot;\n                        app->partitions[partition_id].pid = gpid(app->app_id, partition_id);\n                        process_one_partition(app);\n                    }\n\n                } else {\n                    derror(\"get partition node failed, reason(%s)\", ec.to_string());\n                    err = ec;\n                }\n            },\n            &tracker);\n    };\n\n    auto sync_app = [&](const std::string &app_path) {\n        storage->get_data(\n            app_path,\n            LPC_META_CALLBACK,\n            [this, app_path, &err, &sync_partition](error_code ec, const blob &value) {\n                if (ec == ERR_OK) {\n                    app_info info;\n                    dassert(dsn::json::json_forwarder<app_info>::decode(value, info),\n                            \"invalid json data\");\n                    std::shared_ptr<app_state> app = app_state::create(info);\n                    {\n                        zauto_write_lock l(_lock);\n                        _all_apps.emplace(app->app_id, app);\n                        if (app->status == app_status::AS_AVAILABLE) {\n                            app->status = app_status::AS_CREATING;\n                            _exist_apps.emplace(app->app_name, app);\n                        } else if (app->status == app_status::AS_DROPPED) {\n                            app->status = app_status::AS_DROPPING;\n                        } else {\n                            dassert(false,\n                                    \"invalid status(%s) for app(%s) in remote storage\",\n                                    enum_to_string(app->status),\n                                    app->get_logname());\n                        }\n                    }\n                    app->helpers->split_states.splitting_count = 0;\n                    for (int i = 0; i < app->partition_count; i++) {\n                        std::string partition_path =\n                            app_path + \"/\" + boost::lexical_cast<std::string>(i);\n                        sync_partition(app, i, partition_path);\n                    }\n                } else {\n                    derror(\"get app info from meta state service failed, path = %s, err = %s\",\n                           app_path.c_str(),\n                           ec.to_string());\n                    err = ec;\n                }\n            },\n            &tracker);\n    };\n\n    _all_apps.clear();\n    _exist_apps.clear();\n\n    std::string transaction_state;\n    storage\n        ->get_data(_apps_root,\n                   LPC_META_CALLBACK,\n                   [&err, &transaction_state](error_code ec, const blob &value) {\n                       err = ec;\n                       if (ec == dsn::ERR_OK) {\n                           transaction_state.assign(value.data(), value.length());\n                       }\n                   })\n        ->wait();\n\n    if (ERR_OBJECT_NOT_FOUND == err)\n        return err;\n    dassert(ERR_OK == err, \"can't handle this error (%s)\", err.to_string());\n    dassert(transaction_state == std::string(unlock_state) || transaction_state.empty(),\n            \"invalid transaction state(%s)\",\n            transaction_state.c_str());\n\n    storage->get_children(\n        _apps_root,\n        LPC_META_CALLBACK,\n        [&](error_code ec, const std::vector<std::string> &apps) {\n            if (ec == ERR_OK) {\n                for (const auto &appid_str : apps) {\n                    sync_app(_apps_root + \"/\" + appid_str);\n                }\n            } else {\n                derror(\"get app list from meta state service failed, path = %s, err = %s\",\n                       _apps_root.c_str(),\n                       ec.to_string());\n                err = ec;\n            }\n        },\n        &tracker);\n    tracker.wait_outstanding_tasks();\n    if (err == ERR_OK) {\n        return _all_apps.empty() ? ERR_OBJECT_NOT_FOUND : ERR_OK;\n    }\n    return err;\n}\n\nvoid server_state::initialize_node_state()\n{\n    zauto_write_lock l(_lock);\n    for (auto &app_pair : _all_apps) {\n        app_state &app = *(app_pair.second);\n        for (partition_configuration &pc : app.partitions) {\n            if (!pc.primary.is_invalid()) {\n                node_state *ns = get_node_state(_nodes, pc.primary, true);\n                ns->put_partition(pc.pid, true);\n            }\n            for (auto &ep : pc.secondaries) {\n                dassert(!ep.is_invalid(), \"invalid secondary address, addr = %s\", ep.to_string());\n                node_state *ns = get_node_state(_nodes, ep, true);\n                ns->put_partition(pc.pid, false);\n            }\n        }\n    }\n    for (auto &node : _nodes) {\n        node.second.set_alive(true);\n    }\n    for (auto &app_pair : _all_apps) {\n        app_state &app = *(app_pair.second);\n        for (const partition_configuration &pc : app.partitions) {\n            check_consistency(pc.pid);\n        }\n    }\n}\n\nerror_code server_state::initialize_data_structure()\n{\n    error_code err = sync_apps_from_remote_storage();\n    if (err == ERR_OBJECT_NOT_FOUND) {\n        if (_meta_svc->get_meta_options().recover_from_replica_server) {\n            return ERR_OBJECT_NOT_FOUND;\n        } else {\n            ddebug(\"can't find apps from remote storage, start to initialize default apps\");\n            err = initialize_default_apps();\n        }\n    } else if (err == ERR_OK) {\n        if (_meta_svc->get_meta_options().recover_from_replica_server) {\n            dassert(false,\n                    \"find apps from remote storage, but \"\n                    \"[meta_server].recover_from_replica_server = true\");\n        } else {\n            ddebug(\"sync apps from remote storage ok, get %d apps, init the node state accordingly\",\n                   _all_apps.size());\n            initialize_node_state();\n        }\n    }\n    return err;\n}\n\nvoid server_state::set_config_change_subscriber_for_test(config_change_subscriber subscriber)\n{\n    _config_change_subscriber = subscriber;\n}\n\nvoid server_state::set_replica_migration_subscriber_for_test(\n    replica_migration_subscriber subscriber)\n{\n    _replica_migration_subscriber = subscriber;\n}\n\n// partition server => meta server\n// this is done in meta_state_thread_pool\nvoid server_state::on_config_sync(configuration_query_by_node_rpc rpc)\n{\n    configuration_query_by_node_response &response = rpc.response();\n    const configuration_query_by_node_request &request = rpc.request();\n\n    bool reject_this_request = false;\n    response.__isset.gc_replicas = false;\n    ddebug(\"got config sync request from %s, stored_replicas_count(%d)\",\n           request.node.to_string(),\n           (int)request.stored_replicas.size());\n\n    {\n        zauto_read_lock l(_lock);\n\n        // sync the partitions to the replica server\n        node_state *ns = get_node_state(_nodes, request.node, false);\n        if (ns == nullptr) {\n            ddebug(\"node(%s) not found in meta server\", request.node.to_string());\n            response.err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            response.err = ERR_OK;\n            unsigned int i = 0;\n            response.partitions.resize(ns->partition_count());\n            ns->for_each_partition([&, this](const gpid &pid) {\n                std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n                dassert(app != nullptr, \"invalid app_id, app_id = %d\", pid.get_app_id());\n                config_context &cc = app->helpers->contexts[pid.get_partition_index()];\n\n                // config sync need the newest data to keep the perfect FD,\n                // so if the syncing config is related to the node, we may need to reject this\n                // request\n                if (cc.stage == config_status::pending_remote_sync) {\n                    configuration_update_request *req = cc.pending_sync_request.get();\n                    // when register child partition, stage is config_status::pending_remote_sync,\n                    // but cc.pending_sync_request is not set, see more in function\n                    // 'register_child_on_meta'\n                    if (req == nullptr || req->node == request.node)\n                        return false;\n                }\n\n                response.partitions[i].info = *app;\n                response.partitions[i].config = app->partitions[pid.get_partition_index()];\n                response.partitions[i].host_node = request.node;\n                // set meta_split_status\n                const split_state &app_split_states = app->helpers->split_states;\n                if (app->splitting()) {\n                    auto iter = app_split_states.status.find(pid.get_partition_index());\n                    if (iter != app_split_states.status.end()) {\n                        response.partitions[i].__set_meta_split_status(iter->second);\n                    }\n                }\n                ++i;\n                return true;\n            });\n            if (i < response.partitions.size()) {\n                reject_this_request = true;\n            }\n        }\n\n        // handle the stored replicas & the gc replicas\n        if (!reject_this_request && request.__isset.stored_replicas) {\n            if (ns != nullptr)\n                ns->set_replicas_collect_flag(true);\n            const std::vector<replica_info> &replicas = request.stored_replicas;\n            meta_function_level::type level = _meta_svc->get_function_level();\n            // if the node serve the replica on the meta server, then we ignore it\n            // if the dropped servers on the meta servers are enough, we need to gc it\n            // there are not enough dropped servers, we need to add it to dropped\n            // the app is deleted but not expired, we need to ignore it\n            // if the app is deleted and expired, we need to gc it\n            for (const replica_info &rep : replicas) {\n                dinfo(\"receive stored replica from %s, pid(%d.%d)\",\n                      request.node.to_string(),\n                      rep.pid.get_app_id(),\n                      rep.pid.get_partition_index());\n                std::shared_ptr<app_state> app = get_app(rep.pid.get_app_id());\n                if (app == nullptr || rep.pid.get_partition_index() >= app->partition_count) {\n                    // This app has garbage partition after cancel split, the canceled child\n                    // partition should be gc\n                    if (app != nullptr &&\n                        rep.pid.get_partition_index() < app->partition_count * 2 &&\n                        rep.status == partition_status::PS_ERROR) {\n                        response.gc_replicas.push_back(rep);\n                        dwarn_f(\"notify node({}) to gc replica({}) because it is useless partition \"\n                                \"which is caused by cancel split\",\n                                request.node.to_string(),\n                                rep.pid);\n                    } else {\n                        // app is not recognized or partition is not recognized\n                        dassert(false,\n                                \"gpid({}) on node({}) is not exist on meta server, administrator \"\n                                \"should check consistency of meta data\",\n                                rep.pid,\n                                request.node.to_string());\n                    }\n                } else if (app->status == app_status::AS_DROPPED) {\n                    if (app->expire_second == 0) {\n                        ddebug(\"gpid(%d.%d) on node(%s) is of dropped table, but expire second is \"\n                               \"not specified, do not delete it for safety reason\",\n                               rep.pid.get_app_id(),\n                               rep.pid.get_partition_index(),\n                               request.node.to_string());\n                    } else if (has_seconds_expired(app->expire_second)) {\n                        // can delete replica only when expire second is explicitely specified and\n                        // expired.\n                        if (level <= meta_function_level::fl_steady) {\n                            ddebug(\"gpid(%d.%d) on node(%s) is of dropped and expired table, but \"\n                                   \"current function level is %s, do not delete it for safety \"\n                                   \"reason\",\n                                   rep.pid.get_app_id(),\n                                   rep.pid.get_partition_index(),\n                                   request.node.to_string(),\n                                   _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n                        } else {\n                            response.gc_replicas.push_back(rep);\n                            dwarn(\"notify node(%s) to gc replica(%d.%d) coz the app is dropped and \"\n                                  \"expired\",\n                                  request.node.to_string(),\n                                  rep.pid.get_app_id(),\n                                  rep.pid.get_partition_index());\n                        }\n                    }\n                } else if (app->status == app_status::AS_AVAILABLE) {\n                    bool is_useful_replica =\n                        collect_replica({&_all_apps, &_nodes}, request.node, rep);\n                    if (!is_useful_replica) {\n                        if (level <= meta_function_level::fl_steady) {\n                            ddebug(\"gpid(%d.%d) on node(%s) is useless, but current function level \"\n                                   \"is %s, do not delete it for safety reason\",\n                                   rep.pid.get_app_id(),\n                                   rep.pid.get_partition_index(),\n                                   request.node.to_string(),\n                                   _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n                        } else {\n                            response.gc_replicas.push_back(rep);\n                            dwarn(\"notify node(%s) to gc replica(%d.%d) coz it is useless\",\n                                  request.node.to_string(),\n                                  rep.pid.get_app_id(),\n                                  rep.pid.get_partition_index());\n                        }\n                    }\n                }\n            }\n\n            if (!response.gc_replicas.empty()) {\n                response.__isset.gc_replicas = true;\n            }\n        }\n    }\n\n    if (reject_this_request) {\n        response.err = ERR_BUSY;\n        response.partitions.clear();\n    }\n    ddebug_f(\"send config sync response to {}, err({}), partitions_count({}), \"\n             \"gc_replicas_count({})\",\n             request.node.to_string(),\n             response.err,\n             response.partitions.size(),\n             response.gc_replicas.size());\n}\n\nbool server_state::query_configuration_by_gpid(dsn::gpid id,\n                                               /*out*/ partition_configuration &config)\n{\n    zauto_read_lock l(_lock);\n    const partition_configuration *pc = get_config(_all_apps, id);\n    if (pc != nullptr) {\n        config = *pc;\n        return true;\n    }\n    return false;\n}\n\nvoid server_state::query_configuration_by_index(\n    const configuration_query_by_index_request &request,\n    /*out*/ configuration_query_by_index_response &response)\n{\n    zauto_read_lock l(_lock);\n    auto iter = _exist_apps.find(request.app_name.c_str());\n    if (iter == _exist_apps.end()) {\n        response.err = ERR_OBJECT_NOT_FOUND;\n        return;\n    }\n\n    std::shared_ptr<app_state> &app = iter->second;\n    if (app->status != app_status::AS_AVAILABLE) {\n        derror(\"invalid status(%s) in exist app(%s), app_id(%d)\",\n               enum_to_string(app->status),\n               (app->app_name).c_str(),\n               app->app_id);\n\n        switch (app->status) {\n        case app_status::AS_CREATING:\n        case app_status::AS_RECALLING:\n            response.err = ERR_BUSY_CREATING;\n            break;\n        case app_status::AS_DROPPING:\n            response.err = ERR_BUSY_DROPPING;\n            break;\n        default:\n            response.err = ERR_UNKNOWN;\n        }\n        return;\n    }\n\n    response.err = ERR_OK;\n    response.app_id = app->app_id;\n    response.partition_count = app->partition_count;\n    response.is_stateful = app->is_stateful;\n\n    for (const int32_t &index : request.partition_indices) {\n        if (index >= 0 && index < app->partitions.size())\n            response.partitions.push_back(app->partitions[index]);\n    }\n    if (response.partitions.empty())\n        response.partitions = app->partitions;\n}\n\nvoid server_state::init_app_partition_node(std::shared_ptr<app_state> &app,\n                                           int pidx,\n                                           task_ptr callback)\n{\n    auto on_create_app_partition = [this, pidx, app, callback](error_code ec) mutable {\n        dinfo(\"create partition node: gpid(%d.%d), result: %s\", app->app_id, pidx, ec.to_string());\n        if (ERR_OK == ec || ERR_NODE_ALREADY_EXIST == ec) {\n            {\n                zauto_write_lock l(_lock);\n                process_one_partition(app);\n            }\n            if (callback) {\n                callback->enqueue();\n            }\n        } else if (ERR_TIMEOUT == ec) {\n            dwarn(\"create partition node failed, gpid(%d.%d), retry later\", app->app_id, pidx);\n            // TODO: add parameter of the retry time interval in config file\n            tasking::enqueue(\n                LPC_META_STATE_HIGH,\n                tracker(),\n                std::bind(&server_state::init_app_partition_node, this, app, pidx, callback),\n                0,\n                std::chrono::milliseconds(1000));\n        } else {\n            dassert(false,\n                    \"we can't handle this error in init app partition nodes err(%s), gpid(%d.%d)\",\n                    ec.to_string(),\n                    app->app_id,\n                    pidx);\n        }\n    };\n\n    std::string app_partition_path = get_partition_path(*app, pidx);\n    dsn::blob value =\n        dsn::json::json_forwarder<partition_configuration>::encode(app->partitions[pidx]);\n    _meta_svc->get_remote_storage()->create_node(\n        app_partition_path, LPC_META_STATE_HIGH, on_create_app_partition, value);\n}\n\nvoid server_state::do_app_create(std::shared_ptr<app_state> &app)\n{\n    auto on_create_app_root = [this, app](error_code ec) mutable {\n        if (ERR_OK == ec || ERR_NODE_ALREADY_EXIST == ec) {\n            dinfo(\"create app(%s) on storage service ok\", app->get_logname());\n            for (unsigned int i = 0; i != app->partition_count; ++i) {\n                init_app_partition_node(app, i, nullptr);\n            }\n        } else if (ERR_TIMEOUT == ec) {\n            dwarn(\"the storage service is not available currently, continue to create later\");\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             tracker(),\n                             std::bind(&server_state::do_app_create, this, app),\n                             0,\n                             std::chrono::seconds(1));\n        } else {\n            dassert(false, \"we can't handle this right now, err(%s)\", ec.to_string());\n        }\n    };\n\n    std::string app_dir = get_app_path(*app);\n    blob value = app->to_json(app_status::AS_AVAILABLE);\n    _meta_svc->get_remote_storage()->create_node(\n        app_dir, LPC_META_STATE_HIGH, on_create_app_root, value);\n}\n\nvoid server_state::create_app(dsn::message_ex *msg)\n{\n    configuration_create_app_request request;\n    configuration_create_app_response response;\n    std::shared_ptr<app_state> app;\n    bool will_create_app = false;\n    dsn::unmarshall(msg, request);\n\n    const auto &duplication_env_iterator =\n        request.options.envs.find(duplication_constants::kDuplicationEnvMasterClusterKey);\n    ddebug_f(\"create app request, name({}), type({}), partition_count({}), replica_count({}), \"\n             \"duplication({})\",\n             request.app_name,\n             request.options.app_type,\n             request.options.partition_count,\n             request.options.replica_count,\n             duplication_env_iterator == request.options.envs.end()\n                 ? \"false\"\n                 : fmt::format(\n                       \"{}.{}\",\n                       request.options.envs[duplication_constants::kDuplicationEnvMasterClusterKey],\n                       request.app_name));\n\n    auto option_match_check = [](const create_app_options &opt, const app_state &exist_app) {\n        return opt.partition_count == exist_app.partition_count &&\n               opt.app_type == exist_app.app_type && opt.envs == exist_app.envs &&\n               opt.is_stateful == exist_app.is_stateful &&\n               opt.replica_count == exist_app.max_replica_count;\n    };\n\n    auto level = _meta_svc->get_function_level();\n    if (level <= meta_function_level::fl_freezed) {\n        derror(\"current meta function level is freezed, since there are too few alive nodes\");\n        response.err = ERR_STATE_FREEZED;\n        will_create_app = false;\n    } else if (request.options.partition_count <= 0 ||\n               !validate_target_max_replica_count(request.options.replica_count)) {\n        response.err = ERR_INVALID_PARAMETERS;\n        will_create_app = false;\n    } else {\n        zauto_write_lock l(_lock);\n        app = get_app(request.app_name);\n        if (nullptr != app) {\n            switch (app->status) {\n            case app_status::AS_AVAILABLE:\n                if (!request.options.success_if_exist) {\n                    response.err = ERR_APP_EXIST;\n                } else if (!option_match_check(request.options, *app)) {\n                    response.err = ERR_INVALID_PARAMETERS;\n                } else {\n                    response.err = ERR_OK;\n                    response.appid = app->app_id;\n                }\n                break;\n            case app_status::AS_CREATING:\n            case app_status::AS_RECALLING:\n                response.err = ERR_BUSY_CREATING;\n                break;\n            case app_status::AS_DROPPING:\n                response.err = ERR_BUSY_DROPPING;\n                break;\n            default:\n                break;\n            }\n        } else {\n            will_create_app = true;\n\n            app_info info;\n            info.app_id = next_app_id();\n            info.app_name = request.app_name;\n            info.app_type = request.options.app_type;\n            info.envs = std::move(request.options.envs);\n            info.is_stateful = request.options.is_stateful;\n            info.max_replica_count = request.options.replica_count;\n            info.partition_count = request.options.partition_count;\n            info.status = app_status::AS_CREATING;\n            info.create_second = dsn_now_ms() / 1000;\n            info.init_partition_count = request.options.partition_count;\n\n            app = app_state::create(info);\n            app->helpers->pending_response = msg;\n            app->helpers->partitions_in_progress.store(info.partition_count);\n\n            _all_apps.emplace(app->app_id, app);\n            _exist_apps.emplace(request.app_name, app);\n        }\n    }\n\n    if (will_create_app) {\n        do_app_create(app);\n    } else {\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n    }\n}\n\nvoid server_state::do_app_drop(std::shared_ptr<app_state> &app)\n{\n    auto after_mark_app_dropped = [this, app](error_code ec) mutable {\n        if (ERR_OK == ec) {\n            zauto_write_lock l(_lock);\n            _exist_apps.erase(app->app_name);\n            for (int i = 0; i < app->partition_count; ++i) {\n                drop_partition(app, i);\n            }\n        } else if (ERR_TIMEOUT == ec) {\n            dinfo(\"drop app(%s) prepare timeout, continue to drop later\", app->get_logname());\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             tracker(),\n                             std::bind(&server_state::do_app_drop, this, app),\n                             0,\n                             std::chrono::seconds(1));\n        } else {\n            dassert(false, \"we can't handle this, error(%s)\", ec.to_string());\n        }\n    };\n\n    blob json_app = app->to_json(app_status::AS_DROPPED);\n    std::string app_path = get_app_path(*app);\n    _meta_svc->get_remote_storage()->set_data(\n        app_path, json_app, LPC_META_STATE_HIGH, after_mark_app_dropped);\n}\n\nvoid server_state::drop_app(dsn::message_ex *msg)\n{\n    configuration_drop_app_request request;\n    configuration_drop_app_response response;\n\n    bool do_dropping = false;\n    std::shared_ptr<app_state> app;\n    dsn::unmarshall(msg, request);\n    ddebug(\"drop app request, name(%s)\", request.app_name.c_str());\n    {\n        zauto_write_lock l(_lock);\n        app = get_app(request.app_name);\n        if (nullptr == app) {\n            response.err = request.options.success_if_not_exist ? ERR_OK : ERR_APP_NOT_EXIST;\n        } else {\n            switch (app->status) {\n            case app_status::AS_AVAILABLE:\n                if (app->splitting()) {\n                    // not drop splitting app\n                    response.err = ERR_SPLITTING;\n                    break;\n                }\n                do_dropping = true;\n                app->status = app_status::AS_DROPPING;\n                app->drop_second = dsn_now_ms() / 1000;\n                if (request.options.__isset.reserve_seconds &&\n                    request.options.reserve_seconds > 0) {\n                    app->expire_second = app->drop_second + request.options.reserve_seconds;\n                } else {\n                    app->expire_second = app->drop_second +\n                                         _meta_svc->get_meta_options().hold_seconds_for_dropped_app;\n                }\n                app->helpers->pending_response = msg;\n                dassert(app->helpers->partitions_in_progress.load() == 0,\n                        \"partition_in_progress_cnt = %d\",\n                        app->helpers->partitions_in_progress.load());\n                app->helpers->partitions_in_progress.store(app->partition_count);\n\n                break;\n            case app_status::AS_CREATING:\n            case app_status::AS_RECALLING:\n                response.err = ERR_BUSY_CREATING;\n                break;\n            case app_status::AS_DROPPING:\n                response.err = ERR_BUSY_DROPPING;\n                break;\n            default:\n                dassert(\n                    false, \"invalid app status, status = %s\", ::dsn::enum_to_string(app->status));\n                break;\n            }\n        }\n    }\n    if (do_dropping) {\n        do_app_drop(app);\n    } else {\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n    }\n}\n\nvoid server_state::do_app_recall(std::shared_ptr<app_state> &app)\n{\n    auto after_recall_app = [this, app](dsn::error_code ec) mutable {\n        zauto_write_lock l(_lock);\n        for (int i = 0; i < app->partition_count; ++i) {\n            recall_partition(app, i);\n        }\n    };\n\n    std::string app_path = get_app_path(*app);\n    blob value = app->to_json(app_status::AS_AVAILABLE);\n    _meta_svc->get_remote_storage()->set_data(\n        app_path, value, LPC_META_STATE_HIGH, after_recall_app);\n}\n\nvoid server_state::recall_app(dsn::message_ex *msg)\n{\n    configuration_recall_app_request request;\n    configuration_recall_app_response response;\n    std::shared_ptr<app_state> target_app;\n\n    dsn::unmarshall(msg, request);\n    ddebug(\"recall app request, app_id(%d)\", request.app_id);\n\n    bool do_recalling = false;\n    {\n        zauto_write_lock l(_lock);\n        target_app = get_app(request.app_id);\n        if (target_app == nullptr) {\n            response.err = ERR_APP_NOT_EXIST;\n        } else if (target_app->status != app_status::AS_DROPPED) {\n            if (target_app->status == app_status::AS_CREATING ||\n                target_app->status == app_status::AS_RECALLING)\n                response.err = ERR_BUSY_CREATING;\n            else if (target_app->status == app_status::AS_DROPPING)\n                response.err = ERR_BUSY_DROPPING;\n            else\n                response.err = ERR_APP_EXIST;\n        } else {\n            if (has_seconds_expired(target_app->expire_second)) {\n                response.err = ERR_APP_NOT_EXIST;\n            } else {\n                std::string &new_app_name =\n                    (request.new_app_name == \"\") ? target_app->app_name : request.new_app_name;\n                if (_exist_apps.find(new_app_name) != _exist_apps.end()) {\n                    response.err = ERR_INVALID_PARAMETERS;\n                } else {\n                    do_recalling = true;\n                    target_app->app_name = new_app_name;\n                    target_app->status = app_status::AS_RECALLING;\n                    dassert(target_app->helpers->partitions_in_progress.load() == 0,\n                            \"partition_in_progress_cnt = %d\",\n                            target_app->helpers->partitions_in_progress.load());\n                    target_app->helpers->partitions_in_progress.store(target_app->partition_count);\n                    target_app->helpers->pending_response = msg;\n\n                    _exist_apps.emplace(target_app->app_name, target_app);\n                }\n            }\n        }\n    }\n\n    if (!do_recalling) {\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n        return;\n    }\n    do_app_recall(target_app);\n}\n\nvoid server_state::list_apps(const configuration_list_apps_request &request,\n                             configuration_list_apps_response &response)\n{\n    dinfo(\"list app request, status(%d)\", request.status);\n    zauto_read_lock l(_lock);\n    for (auto &kv : _all_apps) {\n        app_state &app = *(kv.second);\n        if (request.status == app_status::AS_INVALID || request.status == app.status) {\n            response.infos.push_back(app);\n        }\n    }\n    response.err = dsn::ERR_OK;\n}\n\nvoid server_state::send_proposal(rpc_address target, const configuration_update_request &proposal)\n{\n    ddebug(\"send proposal %s for gpid(%d.%d), ballot = %\" PRId64 \", target = %s, node = %s\",\n           ::dsn::enum_to_string(proposal.type),\n           proposal.config.pid.get_app_id(),\n           proposal.config.pid.get_partition_index(),\n           proposal.config.ballot,\n           target.to_string(),\n           proposal.node.to_string());\n    dsn::message_ex *msg =\n        dsn::message_ex::create_request(RPC_CONFIG_PROPOSAL, 0, proposal.config.pid.thread_hash());\n    ::marshall(msg, proposal);\n    _meta_svc->send_message(target, msg);\n}\n\nvoid server_state::send_proposal(const configuration_proposal_action &action,\n                                 const partition_configuration &pc,\n                                 const app_state &app)\n{\n    configuration_update_request request;\n    request.info = app;\n    request.type = action.type;\n    request.node = action.node;\n    request.config = pc;\n    send_proposal(action.target, request);\n}\n\nvoid server_state::request_check(const partition_configuration &old,\n                                 const configuration_update_request &request)\n{\n    const partition_configuration &new_config = request.config;\n\n    switch (request.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n        dassert(old.primary != request.node,\n                \"%s VS %s\",\n                old.primary.to_string(),\n                request.node.to_string());\n        dassert(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) ==\n                    old.secondaries.end(),\n                \"\");\n        break;\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        dassert(old.primary != request.node,\n                \"%s VS %s\",\n                old.primary.to_string(),\n                request.node.to_string());\n        dassert(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) !=\n                    old.secondaries.end(),\n                \"\");\n        break;\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        dassert(old.primary == request.node,\n                \"%s VS %s\",\n                old.primary.to_string(),\n                request.node.to_string());\n        dassert(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) ==\n                    old.secondaries.end(),\n                \"\");\n        break;\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n    case config_type::CT_REMOVE:\n        dassert(old.primary == request.node ||\n                    std::find(old.secondaries.begin(), old.secondaries.end(), request.node) !=\n                        old.secondaries.end(),\n                \"\");\n        break;\n    case config_type::CT_UPGRADE_TO_SECONDARY:\n        dassert(old.primary != request.node,\n                \" %s VS %s\",\n                old.primary.to_string(),\n                request.node.to_string());\n        dassert(std::find(old.secondaries.begin(), old.secondaries.end(), request.node) ==\n                    old.secondaries.end(),\n                \"\");\n        break;\n    case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT:\n        dassert(old.primary == new_config.primary,\n                \"%s VS %s\",\n                old.primary.to_string(),\n                new_config.primary.to_string());\n        dassert(old.secondaries == new_config.secondaries, \"\");\n        break;\n    default:\n        break;\n    }\n}\n\nvoid server_state::update_configuration_locally(\n    app_state &app, std::shared_ptr<configuration_update_request> &config_request)\n{\n    dsn::gpid &gpid = config_request->config.pid;\n    partition_configuration &old_cfg = app.partitions[gpid.get_partition_index()];\n    partition_configuration &new_cfg = config_request->config;\n\n    int min_2pc_count = _meta_svc->get_options().mutation_2pc_min_replica_count;\n    health_status old_health_status = partition_health_status(old_cfg, min_2pc_count);\n    health_status new_health_status = partition_health_status(new_cfg, min_2pc_count);\n\n    if (app.is_stateful) {\n        dassert(old_cfg.ballot == invalid_ballot || old_cfg.ballot + 1 == new_cfg.ballot,\n                \"invalid configuration update request, old ballot %\" PRId64 \", new ballot %\" PRId64\n                \"\",\n                old_cfg.ballot,\n                new_cfg.ballot);\n\n        node_state *ns = nullptr;\n        if (config_request->type != config_type::CT_DROP_PARTITION) {\n            ns = get_node_state(_nodes, config_request->node, false);\n            dassert(ns != nullptr,\n                    \"invalid node address, address = %s\",\n                    config_request->node.to_string());\n        }\n#ifndef NDEBUG\n        request_check(old_cfg, *config_request);\n#endif\n        switch (config_request->type) {\n        case config_type::CT_ASSIGN_PRIMARY:\n        case config_type::CT_UPGRADE_TO_PRIMARY:\n            ns->put_partition(gpid, true);\n            break;\n\n        case config_type::CT_UPGRADE_TO_SECONDARY:\n            ns->put_partition(gpid, false);\n            break;\n\n        case config_type::CT_DOWNGRADE_TO_SECONDARY:\n            ns->remove_partition(gpid, true);\n            break;\n\n        case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        case config_type::CT_REMOVE:\n            ns->remove_partition(gpid, false);\n            break;\n        // nothing to handle, the ballot will updated in below\n        case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT:\n            break;\n\n        case config_type::CT_DROP_PARTITION:\n            for (const rpc_address &node : new_cfg.last_drops) {\n                ns = get_node_state(_nodes, node, false);\n                if (ns != nullptr)\n                    ns->remove_partition(gpid, false);\n            }\n            break;\n\n        case config_type::CT_ADD_SECONDARY:\n        case config_type::CT_ADD_SECONDARY_FOR_LB:\n            dassert(false, \"invalid execution work flow\");\n            break;\n        case config_type::CT_REGISTER_CHILD: {\n            ns->put_partition(gpid, true);\n            for (auto &secondary : config_request->config.secondaries) {\n                auto secondary_node = get_node_state(_nodes, secondary, false);\n                secondary_node->put_partition(gpid, false);\n            }\n            break;\n        }\n        default:\n            dassert(false, \"\");\n            break;\n        }\n    } else {\n        dassert(old_cfg.ballot == new_cfg.ballot,\n                \"invalid ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n                old_cfg.ballot,\n                new_cfg.ballot);\n\n        new_cfg = old_cfg;\n        partition_configuration_stateless pcs(new_cfg);\n        if (config_request->type == config_type::type::CT_ADD_SECONDARY) {\n            pcs.hosts().emplace_back(config_request->host_node);\n            pcs.workers().emplace_back(config_request->node);\n        } else {\n            auto it =\n                std::remove(pcs.hosts().begin(), pcs.hosts().end(), config_request->host_node);\n            pcs.hosts().erase(it);\n\n            it = std::remove(pcs.workers().begin(), pcs.workers().end(), config_request->node);\n            pcs.workers().erase(it);\n        }\n\n        auto it = _nodes.find(config_request->host_node);\n        dassert(it != _nodes.end(),\n                \"invalid node address, address = %s\",\n                config_request->host_node.to_string());\n        if (config_type::CT_REMOVE == config_request->type) {\n            it->second.remove_partition(gpid, false);\n        } else {\n            it->second.put_partition(gpid, false);\n        }\n    }\n\n    // we assume config in config_request stores the proper new config\n    // as we sync to remote storage according to it\n    std::string old_config_str = boost::lexical_cast<std::string>(old_cfg);\n    old_cfg = config_request->config;\n    auto find_name = _config_type_VALUES_TO_NAMES.find(config_request->type);\n    if (find_name != _config_type_VALUES_TO_NAMES.end()) {\n        ddebug(\"meta update config ok: type(%s), old_config=%s, %s\",\n               find_name->second,\n               old_config_str.c_str(),\n               boost::lexical_cast<std::string>(*config_request).c_str());\n    } else {\n        ddebug(\"meta update config ok: type(%d), old_config=%s, %s\",\n               config_request->type,\n               old_config_str.c_str(),\n               boost::lexical_cast<std::string>(*config_request).c_str());\n    }\n\n#ifndef NDEBUG\n    check_consistency(gpid);\n#endif\n    if (_config_change_subscriber) {\n        _config_change_subscriber(_all_apps);\n    }\n\n    _recent_update_config_count->increment();\n    if (old_health_status >= HS_WRITABLE_ILL && new_health_status < HS_WRITABLE_ILL) {\n        _recent_partition_change_unwritable_count->increment();\n    }\n    if (old_health_status < HS_WRITABLE_ILL && new_health_status >= HS_WRITABLE_ILL) {\n        _recent_partition_change_writable_count->increment();\n    }\n}\n\ntask_ptr server_state::update_configuration_on_remote(\n    std::shared_ptr<configuration_update_request> &config_request)\n{\n    meta_function_level::type l = _meta_svc->get_function_level();\n    if (l <= meta_function_level::fl_blind) {\n        ddebug(\"ignore update configuration on remote due to level is %s\",\n               _meta_function_level_VALUES_TO_NAMES.find(l)->second);\n        // NOTICE: pending_sync_task need to be reassigned\n        return tasking::enqueue(\n            LPC_META_STATE_HIGH,\n            tracker(),\n            [this, config_request]() mutable {\n                std::shared_ptr<app_state> app = get_app(config_request->config.pid.get_app_id());\n                config_context &cc =\n                    app->helpers->contexts[config_request->config.pid.get_partition_index()];\n                cc.pending_sync_task = update_configuration_on_remote(config_request);\n            },\n            0,\n            std::chrono::seconds(1));\n    }\n\n    partition_configuration &pc = config_request->config;\n    std::string storage_path = get_partition_path(pc.pid);\n\n    blob json_config = dsn::json::json_forwarder<partition_configuration>::encode(pc);\n    return _meta_svc->get_remote_storage()->set_data(\n        storage_path,\n        json_config,\n        LPC_META_STATE_HIGH,\n        std::bind(&server_state::on_update_configuration_on_remote_reply,\n                  this,\n                  std::placeholders::_1,\n                  config_request),\n        tracker());\n}\n\nvoid server_state::on_update_configuration_on_remote_reply(\n    error_code ec, std::shared_ptr<configuration_update_request> &config_request)\n{\n    zauto_write_lock l(_lock);\n    dsn::gpid &gpid = config_request->config.pid;\n    std::shared_ptr<app_state> app = get_app(gpid.get_app_id());\n    config_context &cc = app->helpers->contexts[gpid.get_partition_index()];\n\n    // if multiple threads exist in the thread pool, the check may be failed\n    dassert(app->status == app_status::AS_AVAILABLE || app->status == app_status::AS_DROPPING,\n            \"if app removed, this task should be cancelled\");\n    if (ec == ERR_TIMEOUT) {\n        cc.pending_sync_task =\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             tracker(),\n                             [this, config_request, &cc]() mutable {\n                                 cc.pending_sync_task =\n                                     update_configuration_on_remote(config_request);\n                             },\n                             0,\n                             std::chrono::seconds(1));\n    } else if (ec == ERR_OK) {\n        update_configuration_locally(*app, config_request);\n        cc.pending_sync_task = nullptr;\n        cc.pending_sync_request.reset();\n        cc.stage = config_status::not_pending;\n        if (cc.msg) {\n            configuration_update_response resp;\n            resp.err = ERR_OK;\n            resp.config = config_request->config;\n            _meta_svc->reply_data(cc.msg, resp);\n            cc.msg->release_ref();\n            cc.msg = nullptr;\n        }\n\n        _meta_svc->get_partition_guardian()->reconfig({&_all_apps, &_nodes}, *config_request);\n        if (config_request->type == config_type::CT_DROP_PARTITION) {\n            process_one_partition(app);\n        } else {\n            configuration_proposal_action action;\n            _meta_svc->get_partition_guardian()->cure({&_all_apps, &_nodes}, gpid, action);\n            if (action.type != config_type::CT_INVALID) {\n                if (_add_secondary_enable_flow_control &&\n                    (action.type == config_type::CT_ADD_SECONDARY ||\n                     action.type == config_type::CT_ADD_SECONDARY_FOR_LB)) {\n                    // ignore adding secondary if add_secondary_enable_flow_control = true\n                } else {\n                    config_request->type = action.type;\n                    config_request->node = action.node;\n                    config_request->info = *app;\n                    send_proposal(action.target, *config_request);\n                }\n            }\n        }\n    } else {\n        dassert(false, \"we can't handle this right now, err = %s\", ec.to_string());\n    }\n}\n\nvoid server_state::recall_partition(std::shared_ptr<app_state> &app, int pidx)\n{\n    auto on_recall_partition = [this, app, pidx](dsn::error_code error) mutable {\n        if (error == dsn::ERR_OK) {\n            zauto_write_lock l(_lock);\n            app->partitions[pidx].partition_flags &= (~pc_flags::dropped);\n            process_one_partition(app);\n        } else if (error == dsn::ERR_TIMEOUT) {\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             tracker(),\n                             std::bind(&server_state::recall_partition, this, app, pidx),\n                             server_state::sStateHash,\n                             std::chrono::seconds(1));\n        } else {\n            dassert(false, \"unable to handle this(%s) right now\", error.to_string());\n        }\n    };\n\n    partition_configuration &pc = app->partitions[pidx];\n    dassert((pc.partition_flags & pc_flags::dropped), \"\");\n\n    pc.partition_flags = 0;\n    blob json_partition = dsn::json::json_forwarder<partition_configuration>::encode(pc);\n    std::string partition_path = get_partition_path(pc.pid);\n    _meta_svc->get_remote_storage()->set_data(\n        partition_path, json_partition, LPC_META_STATE_HIGH, on_recall_partition);\n}\n\nvoid server_state::drop_partition(std::shared_ptr<app_state> &app, int pidx)\n{\n    partition_configuration &pc = app->partitions[pidx];\n    config_context &cc = app->helpers->contexts[pidx];\n\n    std::shared_ptr<configuration_update_request> req =\n        std::make_shared<configuration_update_request>();\n    configuration_update_request &request = *req;\n\n    request.info = *app;\n    request.type = config_type::CT_DROP_PARTITION;\n    request.node = pc.primary;\n\n    request.config = pc;\n    for (auto &node : pc.secondaries) {\n        maintain_drops(request.config.last_drops, node, request.type);\n    }\n    if (!pc.primary.is_invalid()) {\n        maintain_drops(request.config.last_drops, pc.primary, request.type);\n    }\n    request.config.primary.set_invalid();\n    request.config.secondaries.clear();\n\n    dassert((pc.partition_flags & pc_flags::dropped) == 0, \"\");\n    request.config.partition_flags |= pc_flags::dropped;\n\n    // NOTICE this mis-understanding: if a old state is DDD, we may not need to udpate the ballot.\n    // Actually it is necessary. Coz we may send a proposal due to the old DDD state\n    // and laterly a update_config may arrive.\n    // An updated ballot annouces a previous state is INVALID and all actions taken\n    // due to the old one should be staled\n    request.config.ballot++;\n\n    if (config_status::pending_remote_sync == cc.stage) {\n        dwarn(\"gpid(%d.%d) is syncing another request with remote, cancel it due to partition is \"\n              \"dropped\",\n              app->app_id,\n              pidx);\n        cc.cancel_sync();\n    }\n    cc.stage = config_status::pending_remote_sync;\n    cc.pending_sync_request = req;\n    cc.msg = nullptr;\n\n    cc.pending_sync_task = update_configuration_on_remote(req);\n}\n\nvoid server_state::downgrade_primary_to_inactive(std::shared_ptr<app_state> &app, int pidx)\n{\n    partition_configuration &pc = app->partitions[pidx];\n    config_context &cc = app->helpers->contexts[pidx];\n\n    if (config_status::pending_remote_sync == cc.stage) {\n        if (cc.pending_sync_request->type == config_type::CT_DROP_PARTITION) {\n            dassert(app->status == app_status::AS_DROPPING,\n                    \"app(%s) not in dropping state (%s)\",\n                    app->get_logname(),\n                    enum_to_string(app->status));\n            dwarn(\"stop downgrade primary as the partitions(%d.%d) is dropping\", app->app_id, pidx);\n            return;\n        } else {\n            dwarn(\"gpid(%d.%d) is syncing another request with remote, cancel it due to the \"\n                  \"primary(%s) is down\",\n                  pc.pid.get_app_id(),\n                  pc.pid.get_partition_index(),\n                  pc.primary.to_string());\n            cc.cancel_sync();\n        }\n    }\n\n    std::shared_ptr<configuration_update_request> req =\n        std::make_shared<configuration_update_request>();\n    configuration_update_request &request = *req;\n    request.info = *app;\n    request.config = pc;\n    request.type = config_type::CT_DOWNGRADE_TO_INACTIVE;\n    request.node = pc.primary;\n    request.config.ballot++;\n    request.config.primary.set_invalid();\n    maintain_drops(request.config.last_drops, pc.primary, request.type);\n\n    cc.stage = config_status::pending_remote_sync;\n    cc.pending_sync_request = req;\n    cc.msg = nullptr;\n\n    cc.pending_sync_task = update_configuration_on_remote(req);\n}\n\nvoid server_state::downgrade_secondary_to_inactive(std::shared_ptr<app_state> &app,\n                                                   int pidx,\n                                                   const rpc_address &node)\n{\n    partition_configuration &pc = app->partitions[pidx];\n    config_context &cc = app->helpers->contexts[pidx];\n\n    dassert(!pc.primary.is_invalid(), \"this shouldn't be called if the primary is invalid\");\n    if (config_status::pending_remote_sync != cc.stage) {\n        configuration_update_request request;\n        request.info = *app;\n        request.config = pc;\n        request.type = config_type::CT_DOWNGRADE_TO_INACTIVE;\n        request.node = node;\n        send_proposal(pc.primary, request);\n    } else {\n        ddebug(\"gpid(%d.%d) is syncing with remote storage, ignore the remove seconary(%s)\",\n               app->app_id,\n               pidx,\n               node.to_string());\n    }\n}\n\nvoid server_state::downgrade_stateless_nodes(std::shared_ptr<app_state> &app,\n                                             int pidx,\n                                             const rpc_address &address)\n{\n    std::shared_ptr<configuration_update_request> req =\n        std::make_shared<configuration_update_request>();\n    req->info = *app;\n    req->type = config_type::CT_REMOVE;\n    req->host_node = address;\n    req->node.set_invalid();\n    req->config = app->partitions[pidx];\n\n    config_context &cc = app->helpers->contexts[pidx];\n    partition_configuration &pc = req->config;\n\n    unsigned i = 0;\n    for (; i < pc.secondaries.size(); ++i) {\n        if (pc.secondaries[i] == address) {\n            req->node = pc.last_drops[i];\n            break;\n        }\n    }\n    dassert(!req->node.is_invalid(), \"invalid node address, address = %s\", req->node.to_string());\n    // remove host_node & node from secondaries/last_drops, as it will be sync to remote storage\n    for (++i; i < pc.secondaries.size(); ++i) {\n        pc.secondaries[i - 1] = pc.secondaries[i];\n        pc.last_drops[i - 1] = pc.last_drops[i];\n    }\n    pc.secondaries.pop_back();\n    pc.last_drops.pop_back();\n\n    if (config_status::pending_remote_sync == cc.stage) {\n        dwarn(\"gpid(%d.%d) is syncing another request with remote, cancel it due to meta is \"\n              \"removing host(%s) worker(%s)\",\n              pc.pid.get_app_id(),\n              pc.pid.get_partition_index(),\n              req->host_node.to_string(),\n              req->node.to_string());\n        cc.cancel_sync();\n    }\n    cc.stage = config_status::pending_remote_sync;\n    cc.pending_sync_request = req;\n    cc.msg = nullptr;\n\n    cc.pending_sync_task = update_configuration_on_remote(req);\n}\n\nvoid server_state::on_update_configuration(\n    std::shared_ptr<configuration_update_request> &cfg_request, dsn::message_ex *msg)\n{\n    zauto_write_lock l(_lock);\n    dsn::gpid &gpid = cfg_request->config.pid;\n    std::shared_ptr<app_state> app = get_app(gpid.get_app_id());\n    partition_configuration &pc = app->partitions[gpid.get_partition_index()];\n    config_context &cc = app->helpers->contexts[gpid.get_partition_index()];\n    configuration_update_response response;\n    response.err = ERR_IO_PENDING;\n\n    dassert(app != nullptr, \"get get app for app id(%d)\", gpid.get_app_id());\n    dassert(app->is_stateful, \"don't support stateless apps currently, id(%d)\", gpid.get_app_id());\n    auto find_name = _config_type_VALUES_TO_NAMES.find(cfg_request->type);\n    if (find_name != _config_type_VALUES_TO_NAMES.end()) {\n        ddebug(\"recv update config request: type(%s), %s\",\n               find_name->second,\n               boost::lexical_cast<std::string>(*cfg_request).c_str());\n    } else {\n        ddebug(\"recv update config request: type(%d), %s\",\n               cfg_request->type,\n               boost::lexical_cast<std::string>(*cfg_request).c_str());\n    }\n\n    if (is_partition_config_equal(pc, cfg_request->config)) {\n        ddebug(\"duplicated update request for gpid(%d.%d), ballot: %\" PRId64 \"\",\n               gpid.get_app_id(),\n               gpid.get_partition_index(),\n               pc.ballot);\n        response.err = ERR_OK;\n        //\n        // NOTICE:\n        //    if a replica server resend a update-request,\n        //    the meta has update the last_drops, and we should reply with new last_drops\n        //\n        response.config = pc;\n    } else if (pc.ballot + 1 != cfg_request->config.ballot) {\n        ddebug(\"update configuration for gpid(%d.%d) reject coz ballot not match, request ballot: \"\n               \"%\" PRId64 \", meta ballot: %\" PRId64 \"\",\n               gpid.get_app_id(),\n               gpid.get_partition_index(),\n               cfg_request->config.ballot,\n               pc.ballot);\n        response.err = ERR_INVALID_VERSION;\n        response.config = pc;\n    } else if (config_status::pending_remote_sync == cc.stage) {\n        ddebug(\"another request is syncing with remote storage, ignore current request, \"\n               \"gpid(%d.%d), request ballot(%\" PRId64 \")\",\n               gpid.get_app_id(),\n               gpid.get_partition_index(),\n               cfg_request->config.ballot);\n        // we don't reply the replica server, expect it to retry\n        msg->release_ref();\n        return;\n    } else {\n        maintain_drops(cfg_request->config.last_drops, cfg_request->node, cfg_request->type);\n    }\n\n    if (response.err != ERR_IO_PENDING) {\n        _meta_svc->reply_data(msg, response);\n        msg->release_ref();\n    } else {\n        dassert(config_status::not_pending == cc.stage,\n                \"invalid config status, cc.stage = %s\",\n                enum_to_string(cc.stage));\n        cc.stage = config_status::pending_remote_sync;\n        cc.pending_sync_request = cfg_request;\n        cc.msg = msg;\n        cc.pending_sync_task = update_configuration_on_remote(cfg_request);\n    }\n}\n\nvoid server_state::on_partition_node_dead(std::shared_ptr<app_state> &app,\n                                          int pidx,\n                                          const dsn::rpc_address &address)\n{\n    partition_configuration &pc = app->partitions[pidx];\n    if (app->is_stateful) {\n        if (is_primary(pc, address))\n            downgrade_primary_to_inactive(app, pidx);\n        else if (is_secondary(pc, address)) {\n            if (!pc.primary.is_invalid())\n                downgrade_secondary_to_inactive(app, pidx, address);\n            else if (is_secondary(pc, address)) {\n                ddebug(\"gpid(%d.%d): secondary(%s) is down, ignored it due to no primary for this \"\n                       \"partition available\",\n                       pc.pid.get_app_id(),\n                       pc.pid.get_partition_index(),\n                       address.to_string());\n            } else {\n                dassert(false,\n                        \"no primary/secondary on this node, node address = %s\",\n                        address.to_string());\n            }\n        }\n    } else {\n        downgrade_stateless_nodes(app, pidx, address);\n    }\n}\n\nvoid server_state::on_change_node_state(rpc_address node, bool is_alive)\n{\n    dinfo(\"change node(%s) state to %s\", node.to_string(), is_alive ? \"alive\" : \"dead\");\n    zauto_write_lock l(_lock);\n    if (!is_alive) {\n        auto iter = _nodes.find(node);\n        if (iter == _nodes.end()) {\n            ddebug(\"node(%s) doesn't exist in the node state, just ignore\", node.to_string());\n        } else {\n            node_state &ns = iter->second;\n            ns.set_alive(false);\n            ns.set_replicas_collect_flag(false);\n            ns.for_each_partition([&, this](const dsn::gpid &pid) {\n                std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n                dassert(app != nullptr && app->status != app_status::AS_DROPPED,\n                        \"invalid app, app_id = %d\",\n                        pid.get_app_id());\n                on_partition_node_dead(app, pid.get_partition_index(), node);\n                return true;\n            });\n        }\n    } else {\n        get_node_state(_nodes, node, true)->set_alive(true);\n    }\n}\n\nvoid server_state::on_propose_balancer(const configuration_balancer_request &request,\n                                       configuration_balancer_response &response)\n{\n    zauto_write_lock l(_lock);\n    std::shared_ptr<app_state> app = get_app(request.gpid.get_app_id());\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE ||\n        request.gpid.get_partition_index() < 0 ||\n        request.gpid.get_partition_index() >= app->partition_count)\n        response.err = ERR_INVALID_PARAMETERS;\n    else {\n        if (request.force) {\n            partition_configuration &pc = *get_config(_all_apps, request.gpid);\n            for (const configuration_proposal_action &act : request.action_list) {\n                send_proposal(act, pc, *app);\n            }\n            response.err = ERR_OK;\n        } else {\n            _meta_svc->get_balancer()->register_proposals({&_all_apps, &_nodes}, request, response);\n        }\n    }\n}\n\nerror_code\nserver_state::construct_apps(const std::vector<query_app_info_response> &query_app_responses,\n                             const std::vector<dsn::rpc_address> &replica_nodes,\n                             std::string &hint_message)\n{\n    int max_app_id = 0;\n    for (unsigned int i = 0; i < query_app_responses.size(); ++i) {\n        query_app_info_response query_resp = query_app_responses[i];\n        if (query_resp.err != dsn::ERR_OK)\n            continue;\n\n        for (const app_info &info : query_resp.apps) {\n            dassert(info.app_id >= 1, \"invalid app_id, app_id = %d\", info.app_id);\n            auto iter = _all_apps.find(info.app_id);\n            if (iter == _all_apps.end()) {\n                std::shared_ptr<app_state> app = app_state::create(info);\n                ddebug(\"create app info from (%s) for id(%d): %s\",\n                       replica_nodes[i].to_string(),\n                       info.app_id,\n                       boost::lexical_cast<std::string>(info).c_str());\n                _all_apps.emplace(app->app_id, app);\n                max_app_id = std::max(app->app_id, max_app_id);\n            } else {\n                app_info *old_info = iter->second.get();\n                // all info in all replica servers should be the same\n                // coz the app info is only initialized when the replica is\n                // created, and it will NEVER change even if the app is dropped/recalled...\n                if (info != *old_info) // app_info::operator !=\n                {\n                    dassert(false,\n                            \"conflict app info from (%s) for id(%d): new_info(%s), old_info(%s)\",\n                            replica_nodes[i].to_string(),\n                            info.app_id,\n                            boost::lexical_cast<std::string>(info).c_str(),\n                            boost::lexical_cast<std::string>(*old_info).c_str());\n                }\n            }\n        }\n    }\n\n    // create placeholder for dropped table\n    for (int app_id = 1; app_id <= max_app_id; ++app_id) {\n        auto iter = _all_apps.find(app_id);\n        if (iter == _all_apps.end()) {\n            app_info dropped_holder;\n            dropped_holder.app_id = app_id;\n            dropped_holder.app_name = \"__drop_holder__\" + boost::lexical_cast<std::string>(app_id);\n            dropped_holder.app_type = \"pegasus\";\n            dropped_holder.is_stateful = true;\n            dropped_holder.max_replica_count = 3;\n            // in remote-storage-interaction module,\n            // we assume there is at least one partition\n            dropped_holder.partition_count = 1;\n            dropped_holder.status = app_status::AS_DROPPING;\n            dropped_holder.expire_second = dsn_now_ms() / 1000;\n\n            _all_apps.emplace(app_id, app_state::create(dropped_holder));\n        } else {\n            app_info *app_info = iter->second.get();\n            app_info->status = (app_status::AS_AVAILABLE == app_info->status)\n                                   ? app_status::AS_CREATING\n                                   : app_status::AS_DROPPING;\n        }\n    }\n\n    // check conflict table name\n    std::map<std::string, int32_t> checked_names;\n    for (int app_id = max_app_id; app_id >= 1; --app_id) {\n        dassert(_all_apps.find(app_id) != _all_apps.end(), \"invalid app_id, app_id = %d\", app_id);\n        std::shared_ptr<app_state> &app = _all_apps[app_id];\n        std::string old_name = app->app_name;\n        while (checked_names.find(app->app_name) != checked_names.end()) {\n            app->app_name = app->app_name + \"__\" + boost::lexical_cast<std::string>(app_id);\n        }\n        if (app->app_name != old_name) {\n            dwarn(\"app(%d)'s old name(%s) is conflict with others, rename it to (%s)\",\n                  app_id,\n                  old_name.c_str(),\n                  app->app_name.c_str());\n            std::ostringstream oss;\n            oss << \"WARNING: app(\" << app_id << \")'s old name(\" << old_name\n                << \") is conflict with others, rename it to (\" << app->app_name << \")\" << std::endl;\n            hint_message += oss.str();\n        }\n        checked_names.emplace(app->app_name, app_id);\n    }\n\n    ddebug(\"construct apps done, max_app_id = %d\", max_app_id);\n\n    return dsn::ERR_OK;\n}\n\nerror_code server_state::construct_partitions(\n    const std::vector<query_replica_info_response> &query_replica_responses,\n    const std::vector<dsn::rpc_address> &replica_nodes,\n    bool skip_lost_partitions,\n    std::string &hint_message)\n{\n    for (unsigned int i = 0; i < query_replica_responses.size(); ++i) {\n        query_replica_info_response query_resp = query_replica_responses[i];\n        if (query_resp.err != dsn::ERR_OK)\n            continue;\n\n        for (replica_info &r : query_resp.replicas) {\n            dassert(_all_apps.find(r.pid.get_app_id()) != _all_apps.end(), \"\");\n            bool is_accepted = collect_replica({&_all_apps, &_nodes}, replica_nodes[i], r);\n            if (is_accepted) {\n                ddebug(\"accept replica(%s) from node(%s)\",\n                       boost::lexical_cast<std::string>(r).c_str(),\n                       replica_nodes[i].to_string());\n            } else {\n                ddebug(\"ignore replica(%s) from node(%s)\",\n                       boost::lexical_cast<std::string>(r).c_str(),\n                       replica_nodes[i].to_string());\n            }\n        }\n    }\n\n    int succeed_count = 0;\n    int failed_count = 0;\n    for (auto &app_kv : _all_apps) {\n        std::shared_ptr<app_state> &app = app_kv.second;\n        dassert(app->status == app_status::AS_CREATING || app->status == app_status::AS_DROPPING,\n                \"invalid app status, status = %s\",\n                enum_to_string(app->status));\n        if (app->status == app_status::AS_DROPPING) {\n            ddebug(\"ignore constructing partitions for dropping app(%d)\", app->app_id);\n        } else {\n            for (partition_configuration &pc : app->partitions) {\n                bool is_succeed =\n                    construct_replica({&_all_apps, &_nodes}, pc.pid, app->max_replica_count);\n                if (is_succeed) {\n                    ddebug(\"construct partition(%d.%d) succeed: %s\",\n                           app->app_id,\n                           pc.pid.get_partition_index(),\n                           boost::lexical_cast<std::string>(pc).c_str());\n                    if (pc.last_drops.size() + 1 < pc.max_replica_count) {\n                        std::ostringstream oss;\n                        oss << \"WARNING: partition(\" << app->app_id << \".\"\n                            << pc.pid.get_partition_index() << \") only collects \"\n                            << (pc.last_drops.size() + 1) << \"/\" << pc.max_replica_count\n                            << \" of replicas, may lost data\" << std::endl;\n                        hint_message += oss.str();\n                    }\n                    succeed_count++;\n                } else {\n                    dwarn(\"construct partition(%d.%d) failed\",\n                          app->app_id,\n                          pc.pid.get_partition_index());\n                    std::ostringstream oss;\n                    if (skip_lost_partitions) {\n                        oss << \"WARNING: partition(\" << app->app_id << \".\"\n                            << pc.pid.get_partition_index() << \") has no replica collected, force \"\n                                                               \"recover the lost partition to empty\"\n                            << std::endl;\n                    } else {\n                        oss << \"ERROR: partition(\" << app->app_id << \".\"\n                            << pc.pid.get_partition_index()\n                            << \") has no replica collected, you can force recover it by set \"\n                               \"skip_lost_partitions option\"\n                            << std::endl;\n                    }\n                    hint_message += oss.str();\n                    failed_count++;\n                }\n            }\n        }\n    }\n\n    ddebug(\"construct partition done, succeed_count = %d, failed_count = %d, skip_lost_partitions \"\n           \"= %s\",\n           succeed_count,\n           failed_count,\n           (skip_lost_partitions ? \"true\" : \"false\"));\n\n    if (failed_count > 0 && !skip_lost_partitions) {\n        return dsn::ERR_TRY_AGAIN;\n    } else {\n        return dsn::ERR_OK;\n    }\n}\n\ndsn::error_code\nserver_state::sync_apps_from_replica_nodes(const std::vector<dsn::rpc_address> &replica_nodes,\n                                           bool skip_bad_nodes,\n                                           bool skip_lost_partitions,\n                                           std::string &hint_message)\n{\n    int n_replicas = replica_nodes.size();\n    std::vector<query_app_info_response> query_app_responses(n_replicas);\n    std::vector<query_replica_info_response> query_replica_responses(n_replicas);\n    std::vector<dsn::error_code> query_app_errors(n_replicas);\n    std::vector<dsn::error_code> query_replica_errors(n_replicas);\n\n    dsn::task_tracker tracker;\n    for (int i = 0; i < n_replicas; ++i) {\n        ddebug(\"send query app and replica request to node(%s)\", replica_nodes[i].to_string());\n\n        query_app_info_request app_query;\n        app_query.meta_server = dsn_primary_address();\n\n        rpc::call(replica_nodes[i],\n                  RPC_QUERY_APP_INFO,\n                  app_query,\n                  &tracker,\n                  [i, &replica_nodes, &query_app_errors, &query_app_responses](\n                      dsn::error_code err, query_app_info_response &&resp) mutable {\n                      ddebug(\"received query app response from node(%s), err(%s), apps_count(%d)\",\n                             replica_nodes[i].to_string(),\n                             err.to_string(),\n                             (int)resp.apps.size());\n                      query_app_errors[i] = err;\n                      if (err == dsn::ERR_OK) {\n                          query_app_responses[i] = std::move(resp);\n                      }\n                  });\n\n        query_replica_info_request replica_query;\n        replica_query.node = replica_nodes[i];\n        rpc::call(\n            replica_nodes[i],\n            RPC_QUERY_REPLICA_INFO,\n            replica_query,\n            &tracker,\n            [i, &replica_nodes, &query_replica_errors, &query_replica_responses](\n                dsn::error_code err, query_replica_info_response &&resp) mutable {\n                ddebug(\"received query replica response from node(%s), err(%s), replicas_count(%d)\",\n                       replica_nodes[i].to_string(),\n                       err.to_string(),\n                       (int)resp.replicas.size());\n                query_replica_errors[i] = err;\n                if (err == dsn::ERR_OK) {\n                    query_replica_responses[i] = std::move(resp);\n                }\n            });\n    }\n\n    tracker.wait_outstanding_tasks();\n    int failed_count = 0;\n    int succeed_count = 0;\n    for (int i = 0; i < n_replicas; ++i) {\n        error_code err = dsn::ERR_OK;\n        if (query_app_errors[i] != dsn::ERR_OK) {\n            dwarn(\"query app info from node(%s) failed, reason: %s\",\n                  replica_nodes[i].to_string(),\n                  query_app_errors[i].to_string());\n            err = query_app_errors[i];\n        }\n        if (query_replica_errors[i] != dsn::ERR_OK) {\n            dwarn(\"query replica info from node(%s) failed, reason: %s\",\n                  replica_nodes[i].to_string(),\n                  query_replica_errors[i].to_string());\n            err = query_replica_errors[i];\n        }\n        if (err != dsn::ERR_OK) {\n            failed_count++;\n            query_app_errors[i] = err;\n            query_replica_errors[i] = err;\n            std::ostringstream oss;\n            if (skip_bad_nodes) {\n                oss << \"WARNING: collect app and replica info from node(\"\n                    << replica_nodes[i].to_string() << \") failed with err(\" << err.to_string()\n                    << \"), skip the bad node\" << std::endl;\n            } else {\n                oss << \"ERROR: collect app and replica info from node(\"\n                    << replica_nodes[i].to_string() << \") failed with err(\" << err.to_string()\n                    << \"), you can skip it by set skip_bad_nodes option\" << std::endl;\n            }\n            hint_message += oss.str();\n        } else {\n            succeed_count++;\n        }\n    }\n\n    ddebug(\"sync apps and replicas from replica nodes done, succeed_count = %d, failed_count = %d, \"\n           \"skip_bad_nodes = %s\",\n           succeed_count,\n           failed_count,\n           (skip_bad_nodes ? \"true\" : \"false\"));\n\n    if (failed_count > 0 && !skip_bad_nodes) {\n        return dsn::ERR_TRY_AGAIN;\n    }\n\n    zauto_write_lock l(_lock);\n\n    dsn::error_code err = construct_apps(query_app_responses, replica_nodes, hint_message);\n    if (err != dsn::ERR_OK) {\n        derror(\"construct apps failed, err = %s\", err.to_string());\n        return err;\n    }\n\n    err = construct_partitions(\n        query_replica_responses, replica_nodes, skip_lost_partitions, hint_message);\n    if (err != dsn::ERR_OK) {\n        derror(\"construct partitions failed, err = %s\", err.to_string());\n        return err;\n    }\n\n    return dsn::ERR_OK;\n}\n\nvoid server_state::on_start_recovery(const configuration_recovery_request &req,\n                                     configuration_recovery_response &resp)\n{\n    ddebug(\"start recovery, node_count = %d, skip_bad_nodes = %s, skip_lost_partitions = %s\",\n           (int)req.recovery_set.size(),\n           req.skip_bad_nodes ? \"true\" : \"false\",\n           req.skip_lost_partitions ? \"true\" : \"false\");\n\n    resp.err = sync_apps_from_replica_nodes(\n        req.recovery_set, req.skip_bad_nodes, req.skip_lost_partitions, resp.hint_message);\n    if (resp.err != dsn::ERR_OK) {\n        derror(\"sync apps from replica nodes failed when do recovery, err = %s\",\n               resp.err.to_string());\n        _all_apps.clear();\n        return;\n    }\n\n    resp.err = sync_apps_to_remote_storage();\n    if (resp.err != dsn::ERR_OK) {\n        dassert(false,\n                \"sync apps to remote storage failed when do recovery, err = %s, \"\n                \"need to manually clear things from remote storage and restart the service\",\n                resp.err.to_string());\n    }\n\n    initialize_node_state();\n}\n\nvoid server_state::clear_proposals()\n{\n    ddebug(\"clear all exist proposals\");\n    zauto_write_lock l(_lock);\n    for (auto &kv : _exist_apps) {\n        std::shared_ptr<app_state> &app = kv.second;\n        app->helpers->clear_proposals();\n    }\n}\n\nbool server_state::can_run_balancer()\n{\n    // dead nodes check\n    for (auto iter = _nodes.begin(); iter != _nodes.end();) {\n        if (!iter->second.alive()) {\n            if (iter->second.partition_count() != 0) {\n                ddebug(\"don't do replica migration coz dead node(%s) has %d partitions not removed\",\n                       iter->second.addr().to_string(),\n                       iter->second.partition_count());\n                return false;\n            }\n            _nodes.erase(iter++);\n        } else\n            ++iter;\n    }\n\n    // table stability check\n    int c = count_staging_app();\n    if (c != 0) {\n        ddebug(\"don't do replica migration coz %d table(s) is(are) in staging state\", c);\n        return false;\n    }\n    return true;\n}\n\nvoid server_state::update_partition_perf_counter()\n{\n    int counters[HS_MAX_VALUE];\n    ::memset(counters, 0, sizeof(counters));\n    int min_2pc_count = _meta_svc->get_options().mutation_2pc_min_replica_count;\n    auto func = [&](const std::shared_ptr<app_state> &app) {\n        for (unsigned int i = 0; i != app->partition_count; ++i) {\n            health_status st = partition_health_status(app->partitions[i], min_2pc_count);\n            counters[st]++;\n        }\n        return true;\n    };\n    for_each_available_app(_all_apps, func);\n    _dead_partition_count->set(counters[HS_DEAD]);\n    _unreadable_partition_count->set(counters[HS_UNREADABLE]);\n    _unwritable_partition_count->set(counters[HS_UNWRITABLE]);\n    _writable_ill_partition_count->set(counters[HS_WRITABLE_ILL]);\n    _healthy_partition_count->set(counters[HS_HEALTHY]);\n}\n\nbool server_state::check_all_partitions()\n{\n    int healthy_partitions = 0;\n    int total_partitions = 0;\n    meta_function_level::type level = _meta_svc->get_function_level();\n\n    zauto_write_lock l(_lock);\n\n    update_partition_perf_counter();\n\n    // first the cure stage\n    if (level <= meta_function_level::fl_freezed) {\n        ddebug(\"service is in level(%s), don't do any cure or balancer actions\",\n               _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n        return false;\n    }\n    ddebug(\"start to check all partitions, add_secondary_enable_flow_control = %s, \"\n           \"add_secondary_max_count_for_one_node = %d\",\n           _add_secondary_enable_flow_control ? \"true\" : \"false\",\n           _add_secondary_max_count_for_one_node);\n    _meta_svc->get_partition_guardian()->clear_ddd_partitions();\n    int send_proposal_count = 0;\n    std::vector<configuration_proposal_action> add_secondary_actions;\n    std::vector<gpid> add_secondary_gpids;\n    std::vector<bool> add_secondary_proposed;\n    std::map<rpc_address, int> add_secondary_running_nodes; // node --> running_count\n    for (auto &app_pair : _exist_apps) {\n        std::shared_ptr<app_state> &app = app_pair.second;\n        if (app->status == app_status::AS_CREATING || app->status == app_status::AS_DROPPING) {\n            ddebug(\"ignore app(%s)(%d) because it's status is %s\",\n                   app->app_name.c_str(),\n                   app->app_id,\n                   ::dsn::enum_to_string(app->status));\n            continue;\n        }\n        for (unsigned int i = 0; i != app->partition_count; ++i) {\n            partition_configuration &pc = app->partitions[i];\n            config_context &cc = app->helpers->contexts[i];\n            // partition is under re-configuration or is child partition\n            if (cc.stage != config_status::pending_remote_sync && pc.ballot != invalid_ballot) {\n                configuration_proposal_action action;\n                pc_status s = _meta_svc->get_partition_guardian()->cure(\n                    {&_all_apps, &_nodes}, pc.pid, action);\n                dinfo(\"gpid(%d.%d) is in status(%s)\",\n                      pc.pid.get_app_id(),\n                      pc.pid.get_partition_index(),\n                      enum_to_string(s));\n                if (pc_status::healthy != s) {\n                    if (action.type != config_type::CT_INVALID) {\n                        if (action.type == config_type::CT_ADD_SECONDARY ||\n                            action.type == config_type::CT_ADD_SECONDARY_FOR_LB) {\n                            add_secondary_actions.push_back(std::move(action));\n                            add_secondary_gpids.push_back(pc.pid);\n                            add_secondary_proposed.push_back(false);\n                        } else {\n                            send_proposal(action, pc, *app);\n                            send_proposal_count++;\n                        }\n                    }\n                } else {\n                    healthy_partitions++;\n                }\n            } else {\n                ddebug(\"ignore gpid(%d.%d) as it's stage is pending_remote_sync\",\n                       pc.pid.get_app_id(),\n                       pc.pid.get_partition_index());\n            }\n        }\n        total_partitions += app->partition_count;\n    }\n\n    // assign secondary for urgent\n    for (int i = 0; i < add_secondary_actions.size(); ++i) {\n        gpid &pid = add_secondary_gpids[i];\n        partition_configuration &pc = *get_config(_all_apps, pid);\n        if (!add_secondary_proposed[i] && pc.secondaries.empty()) {\n            configuration_proposal_action &action = add_secondary_actions[i];\n            if (_add_secondary_enable_flow_control &&\n                add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) {\n                // ignore\n                continue;\n            }\n            std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n            send_proposal(action, pc, *app);\n            send_proposal_count++;\n            add_secondary_proposed[i] = true;\n            add_secondary_running_nodes[action.node]++;\n        }\n    }\n\n    // assign secondary for all\n    for (int i = 0; i < add_secondary_actions.size(); ++i) {\n        if (!add_secondary_proposed[i]) {\n            configuration_proposal_action &action = add_secondary_actions[i];\n            gpid pid = add_secondary_gpids[i];\n            partition_configuration &pc = *get_config(_all_apps, pid);\n            if (_add_secondary_enable_flow_control &&\n                add_secondary_running_nodes[action.node] >= _add_secondary_max_count_for_one_node) {\n                ddebug(\"do not send %s proposal for gpid(%d.%d) for flow control reason, target = \"\n                       \"%s, node = %s\",\n                       ::dsn::enum_to_string(action.type),\n                       pc.pid.get_app_id(),\n                       pc.pid.get_partition_index(),\n                       action.target.to_string(),\n                       action.node.to_string());\n                continue;\n            }\n            std::shared_ptr<app_state> app = get_app(pid.get_app_id());\n            send_proposal(action, pc, *app);\n            send_proposal_count++;\n            add_secondary_proposed[i] = true;\n            add_secondary_running_nodes[action.node]++;\n        }\n    }\n\n    int ignored_add_secondary_count = 0;\n    int add_secondary_count = 0;\n    for (int i = 0; i < add_secondary_actions.size(); ++i) {\n        if (!add_secondary_proposed[i]) {\n            ignored_add_secondary_count++;\n        } else {\n            add_secondary_count++;\n        }\n    }\n\n    ddebug(\"check all partitions done, send_proposal_count = %d, add_secondary_count = %d, \"\n           \"ignored_add_secondary_count = %d\",\n           send_proposal_count,\n           add_secondary_count,\n           ignored_add_secondary_count);\n\n    // then the balancer stage\n    if (level < meta_function_level::fl_steady) {\n        ddebug(\"don't do replica migration coz meta server is in level(%s)\",\n               _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n        return false;\n    }\n\n    if (healthy_partitions != total_partitions) {\n        ddebug(\"don't do replica migration coz %d of %d partitions aren't healthy\",\n               total_partitions - healthy_partitions,\n               total_partitions);\n        return false;\n    }\n\n    if (!can_run_balancer()) {\n        ddebug(\"don't do replica migration coz can_run_balancer() returns false\");\n        return false;\n    }\n\n    if (level == meta_function_level::fl_steady) {\n        ddebug(\"check if any replica migration can be done when meta server is in level(%s)\",\n               _meta_function_level_VALUES_TO_NAMES.find(level)->second);\n        _meta_svc->get_balancer()->check({&_all_apps, &_nodes}, _temporary_list);\n        ddebug(\"balance checker operation count = %d\", _temporary_list.size());\n        // update balance checker operation count\n        _meta_svc->get_balancer()->report(_temporary_list, true);\n        return false;\n    }\n\n    if (_meta_svc->get_balancer()->balance({&_all_apps, &_nodes}, _temporary_list)) {\n        ddebug(\"try to do replica migration\");\n        _meta_svc->get_balancer()->apply_balancer({&_all_apps, &_nodes}, _temporary_list);\n        // update balancer action details\n        _meta_svc->get_balancer()->report(_temporary_list, false);\n        if (_replica_migration_subscriber)\n            _replica_migration_subscriber(_temporary_list);\n        tasking::enqueue(LPC_META_STATE_NORMAL,\n                         _meta_svc->tracker(),\n                         std::bind(&meta_service::balancer_run, _meta_svc));\n        return false;\n    }\n\n    ddebug(\"check if any replica migration left\");\n    _meta_svc->get_balancer()->check({&_all_apps, &_nodes}, _temporary_list);\n    ddebug(\"balance checker operation count = %d\", _temporary_list.size());\n    // update balance checker operation count\n    _meta_svc->get_balancer()->report(_temporary_list, true);\n\n    return true;\n}\n\nvoid server_state::get_cluster_balance_score(double &primary_stddev, double &total_stddev)\n{\n    zauto_read_lock l(_lock);\n    _meta_svc->get_balancer()->score({&_all_apps, &_nodes}, primary_stddev, total_stddev);\n}\n\nvoid server_state::check_consistency(const dsn::gpid &gpid)\n{\n    auto iter = _all_apps.find(gpid.get_app_id());\n    dassert(iter != _all_apps.end(),\n            \"invalid gpid(%d.%d)\",\n            gpid.get_app_id(),\n            gpid.get_partition_index());\n\n    app_state &app = *(iter->second);\n    partition_configuration &config = app.partitions[gpid.get_partition_index()];\n\n    if (app.is_stateful) {\n        if (config.primary.is_invalid() == false) {\n            auto it = _nodes.find(config.primary);\n            dassert(it != _nodes.end(),\n                    \"invalid primary address, address = %s\",\n                    config.primary.to_string());\n            dassert(it->second.served_as(gpid) == partition_status::PS_PRIMARY,\n                    \"node should serve as PS_PRIMARY, but status = %s\",\n                    dsn::enum_to_string(it->second.served_as(gpid)));\n\n            auto it2 =\n                std::find(config.last_drops.begin(), config.last_drops.end(), config.primary);\n            dassert(it2 == config.last_drops.end(),\n                    \"primary shouldn't appear in last_drops, address = %s\",\n                    config.primary.to_string());\n        }\n\n        for (auto &ep : config.secondaries) {\n            auto it = _nodes.find(ep);\n            dassert(it != _nodes.end(), \"invalid secondary address, address = %s\", ep.to_string());\n            dassert(it->second.served_as(gpid) == partition_status::PS_SECONDARY,\n                    \"node should serve as PS_SECONDARY, but status = %s\",\n                    dsn::enum_to_string(it->second.served_as(gpid)));\n\n            auto it2 = std::find(config.last_drops.begin(), config.last_drops.end(), ep);\n            dassert(it2 == config.last_drops.end(),\n                    \"secondary shouldn't appear in last_drops, address = %s\",\n                    ep.to_string());\n        }\n    } else {\n        partition_configuration_stateless pcs(config);\n        dassert(pcs.hosts().size() == pcs.workers().size(),\n                \"%d VS %d\",\n                pcs.hosts().size(),\n                pcs.workers().size());\n        for (auto &ep : pcs.hosts()) {\n            auto it = _nodes.find(ep);\n            dassert(it != _nodes.end(), \"invalid host, address = %s\", ep.to_string());\n            dassert(it->second.served_as(gpid) == partition_status::PS_SECONDARY,\n                    \"node should serve as PS_SECONDARY, but status = %s\",\n                    dsn::enum_to_string(it->second.served_as(gpid)));\n        }\n    }\n}\n\nvoid server_state::lock_read(zauto_read_lock &other)\n{\n    zauto_read_lock l(_lock);\n    l.swap(other);\n}\n\nvoid server_state::lock_write(zauto_write_lock &other)\n{\n    zauto_write_lock l(_lock);\n    l.swap(other);\n}\n\nvoid server_state::do_update_app_info(const std::string &app_path,\n                                      const app_info &info,\n                                      const std::function<void(error_code ec)> &cb)\n{\n    // persistent envs to zookeeper\n    blob value = dsn::json::json_forwarder<app_info>::encode(info);\n    auto new_cb = [ this, app_path, info, user_cb = std::move(cb) ](error_code ec)\n    {\n        if (ec == ERR_OK) {\n            user_cb(ec);\n        } else if (ec == ERR_TIMEOUT) {\n            dwarn(\"update app_info(app = %s) to remote storage timeout, continue to update later\",\n                  info.app_name.c_str());\n            tasking::enqueue(\n                LPC_META_STATE_NORMAL,\n                tracker(),\n                std::bind(\n                    &server_state::do_update_app_info, this, app_path, info, std::move(user_cb)),\n                0,\n                std::chrono::seconds(1));\n        } else {\n            dassert(false, \"we can't handle this, error(%s)\", ec.to_string());\n        }\n    };\n    // TODO(cailiuyang): callback scheduling order may be undefined if multiple requests are\n    // sending to the remote storage concurrently.\n    _meta_svc->get_remote_storage()->set_data(\n        app_path, value, LPC_META_STATE_NORMAL, std::move(new_cb), tracker());\n}\n\nvoid server_state::set_app_envs(const app_env_rpc &env_rpc)\n{\n    const configuration_update_app_env_request &request = env_rpc.request();\n    if (!request.__isset.keys || !request.__isset.values ||\n        request.keys.size() != request.values.size() || request.keys.size() <= 0) {\n        env_rpc.response().err = ERR_INVALID_PARAMETERS;\n        dwarn(\"set app envs failed with invalid request\");\n        return;\n    }\n    const std::vector<std::string> &keys = request.keys;\n    const std::vector<std::string> &values = request.values;\n    const std::string &app_name = request.app_name;\n\n    std::ostringstream os;\n    for (int i = 0; i < keys.size(); i++) {\n        if (i != 0)\n            os << \", \";\n\n        if (!validate_app_env(keys[i], values[i], env_rpc.response().hint_message)) {\n            env_rpc.response().err = ERR_INVALID_PARAMETERS;\n            return;\n        }\n\n        os << keys[i] << \"=\" << values[i];\n    }\n    ddebug(\"set app envs for app(%s) from remote(%s): kvs = {%s}\",\n           app_name.c_str(),\n           env_rpc.remote_address().to_string(),\n           os.str().c_str());\n\n    app_info ainfo;\n    std::string app_path;\n    {\n        zauto_read_lock l(_lock);\n        std::shared_ptr<app_state> app = get_app(app_name);\n        if (app == nullptr) {\n            dwarn(\"set app envs failed with invalid app_name(%s)\", app_name.c_str());\n            env_rpc.response().err = ERR_INVALID_PARAMETERS;\n            env_rpc.response().hint_message = \"invalid app name\";\n            return;\n        } else {\n            ainfo = *(reinterpret_cast<app_info *>(app.get()));\n            app_path = get_app_path(*app);\n        }\n    }\n    for (int idx = 0; idx < keys.size(); idx++) {\n        ainfo.envs[keys[idx]] = values[idx];\n    }\n    do_update_app_info(app_path, ainfo, [this, app_name, keys, values, env_rpc](error_code ec) {\n        dassert(\n            ec == ERR_OK, \"update app_info to remote storage failed with err = %s\", ec.to_string());\n\n        zauto_write_lock l(_lock);\n        std::shared_ptr<app_state> app = get_app(app_name);\n        std::string old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        for (int idx = 0; idx < keys.size(); idx++) {\n            app->envs[keys[idx]] = values[idx];\n        }\n        std::string new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        ddebug(\"app envs changed: old_envs = {%s}, new_envs = {%s}\",\n               old_envs.c_str(),\n               new_envs.c_str());\n    });\n}\n\nvoid server_state::del_app_envs(const app_env_rpc &env_rpc)\n{\n    const configuration_update_app_env_request &request = env_rpc.request();\n    if (!request.__isset.keys || request.keys.size() <= 0) {\n        env_rpc.response().err = ERR_INVALID_PARAMETERS;\n        dwarn(\"del app envs failed with invalid request\");\n        return;\n    }\n    const std::vector<std::string> &keys = request.keys;\n    const std::string &app_name = request.app_name;\n\n    std::ostringstream os;\n    for (int i = 0; i < keys.size(); i++) {\n        if (i != 0)\n            os << \",\";\n        os << keys[i];\n    }\n    ddebug(\"del app envs for app(%s) from remote(%s): keys = {%s}\",\n           app_name.c_str(),\n           env_rpc.remote_address().to_string(),\n           os.str().c_str());\n\n    app_info ainfo;\n    std::string app_path;\n    {\n        zauto_read_lock l(_lock);\n        std::shared_ptr<app_state> app = get_app(app_name);\n        if (app == nullptr) {\n            dwarn(\"del app envs failed with invalid app_name(%s)\", app_name.c_str());\n            env_rpc.response().err = ERR_INVALID_PARAMETERS;\n            env_rpc.response().hint_message = \"invalid app name\";\n            return;\n        } else {\n            ainfo = *(reinterpret_cast<app_info *>(app.get()));\n            app_path = get_app_path(*app);\n        }\n    }\n\n    std::ostringstream oss;\n    oss << \"deleted keys:\";\n    int deleted = 0;\n    for (const auto &key : keys) {\n        if (ainfo.envs.erase(key) > 0) {\n            oss << std::endl << \"    \" << key;\n            deleted++;\n        }\n    }\n\n    if (deleted == 0) {\n        ddebug(\"no key need to delete\");\n        env_rpc.response().hint_message = \"no key need to delete\";\n        return;\n    } else {\n        env_rpc.response().hint_message = oss.str();\n    }\n\n    do_update_app_info(app_path, ainfo, [this, app_name, keys, env_rpc](error_code ec) {\n        dassert(\n            ec == ERR_OK, \"update app_info to remote storage failed with err = %s\", ec.to_string());\n\n        zauto_write_lock l(_lock);\n        std::shared_ptr<app_state> app = get_app(app_name);\n        std::string old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        for (const auto &key : keys) {\n            app->envs.erase(key);\n        }\n        std::string new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        ddebug(\"app envs changed: old_envs = {%s}, new_envs = {%s}\",\n               old_envs.c_str(),\n               new_envs.c_str());\n    });\n}\n\nvoid server_state::clear_app_envs(const app_env_rpc &env_rpc)\n{\n    const configuration_update_app_env_request &request = env_rpc.request();\n    if (!request.__isset.clear_prefix) {\n        env_rpc.response().err = ERR_INVALID_PARAMETERS;\n        dwarn(\"clear app envs failed with invalid request\");\n        return;\n    }\n\n    const std::string &prefix = request.clear_prefix;\n    const std::string &app_name = request.app_name;\n    ddebug(\"clear app envs for app(%s) from remote(%s): prefix = {%s}\",\n           app_name.c_str(),\n           env_rpc.remote_address().to_string(),\n           prefix.c_str());\n\n    app_info ainfo;\n    std::string app_path;\n    {\n        zauto_read_lock l(_lock);\n        std::shared_ptr<app_state> app = get_app(app_name);\n        if (app == nullptr) {\n            dwarn(\"clear app envs failed with invalid app_name(%s)\", app_name.c_str());\n            env_rpc.response().err = ERR_INVALID_PARAMETERS;\n            env_rpc.response().hint_message = \"invalid app name\";\n            return;\n        } else {\n            ainfo = *(reinterpret_cast<app_info *>(app.get()));\n            app_path = get_app_path(*app);\n        }\n    }\n\n    if (ainfo.envs.empty()) {\n        ddebug(\"no key need to delete\");\n        env_rpc.response().hint_message = \"no key need to delete\";\n        return;\n    }\n\n    std::set<std::string> erase_keys;\n    std::ostringstream oss;\n    oss << \"deleted keys:\";\n\n    if (prefix.empty()) {\n        // ignore prefix\n        for (auto &kv : ainfo.envs) {\n            oss << std::endl << \"    \" << kv.first;\n        }\n        ainfo.envs.clear();\n    } else {\n        // acquire key\n        for (const auto &pair : ainfo.envs) {\n            const std::string &key = pair.first;\n            // normal : key = prefix.xxx\n            if (key.size() > prefix.size() + 1) {\n                if (key.substr(0, prefix.size()) == prefix && key.at(prefix.size()) == '.') {\n                    erase_keys.emplace(key);\n                }\n            }\n        }\n        // erase\n        for (const auto &key : erase_keys) {\n            oss << std::endl << \"    \" << key;\n            ainfo.envs.erase(key);\n        }\n    }\n\n    if (!prefix.empty() && erase_keys.empty()) {\n        // no need update app_info\n        ddebug(\"no key need to delete\");\n        env_rpc.response().hint_message = \"no key need to delete\";\n        return;\n    } else {\n        env_rpc.response().hint_message = oss.str();\n    }\n\n    do_update_app_info(\n        app_path, ainfo, [this, app_name, prefix, erase_keys, env_rpc](error_code ec) {\n            dassert(ec == ERR_OK,\n                    \"update app_info to remote storage failed with err = %s\",\n                    ec.to_string());\n\n            zauto_write_lock l(_lock);\n            std::shared_ptr<app_state> app = get_app(app_name);\n            std::string old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n            if (prefix.empty()) {\n                app->envs.clear();\n            } else {\n                for (const auto &key : erase_keys) {\n                    app->envs.erase(key);\n                }\n            }\n            std::string new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n            ddebug(\"app envs changed: old_envs = {%s}, new_envs = {%s}\",\n                   old_envs.c_str(),\n                   new_envs.c_str());\n        });\n}\n\nnamespace {\n\nbool validate_target_max_replica_count_internal(int32_t max_replica_count,\n                                                int32_t alive_node_count,\n                                                std::string &hint_message)\n{\n    if (max_replica_count > FLAGS_max_allowed_replica_count ||\n        max_replica_count < FLAGS_min_allowed_replica_count) {\n        hint_message = fmt::format(\"requested replica count({}) must be \"\n                                   \"within the range of [min={}, max={}]\",\n                                   max_replica_count,\n                                   FLAGS_min_allowed_replica_count,\n                                   FLAGS_max_allowed_replica_count);\n        return false;\n    }\n\n    if (max_replica_count > alive_node_count) {\n        hint_message = fmt::format(\"there are not enough alive replica servers({}) \"\n                                   \"for the requested replica count({})\",\n                                   alive_node_count,\n                                   max_replica_count);\n        return false;\n    }\n\n    return true;\n}\n\n} // anonymous namespace\n\nbool server_state::validate_target_max_replica_count(int32_t max_replica_count,\n                                                     std::string &hint_message) const\n{\n    const auto alive_node_count = static_cast<int32_t>(_meta_svc->get_alive_node_count());\n\n    return validate_target_max_replica_count_internal(\n        max_replica_count, alive_node_count, hint_message);\n}\n\nbool server_state::validate_target_max_replica_count(int32_t max_replica_count) const\n{\n    std::string hint_message;\n    const auto valid = validate_target_max_replica_count(max_replica_count, hint_message);\n    if (!valid) {\n        derror_f(\"target max replica count is invalid: message={}\", hint_message);\n    }\n\n    return valid;\n}\n\nvoid server_state::on_start_manual_compact(start_manual_compact_rpc rpc)\n{\n    const std::string &app_name = rpc.request().app_name;\n    auto &response = rpc.response();\n\n    std::map<std::string, std::string> envs;\n    {\n        zauto_read_lock l(_lock);\n        auto app = get_app(app_name);\n        if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n            response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n            response.hint_msg =\n                fmt::format(\"app {} is {}\",\n                            app_name,\n                            response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"not available\");\n            derror_f(\"{}\", response.hint_msg);\n            return;\n        }\n        envs = app->envs;\n    }\n\n    auto iter = envs.find(replica_envs::MANUAL_COMPACT_DISABLED);\n    if (iter != envs.end() && iter->second == \"true\") {\n        response.err = ERR_OPERATION_DISABLED;\n        response.hint_msg = fmt::format(\"app {} disable manual compaction\", app_name);\n        derror_f(\"{}\", response.hint_msg);\n        return;\n    }\n\n    std::vector<std::string> keys;\n    std::vector<std::string> values;\n    if (!parse_compaction_envs(rpc, keys, values)) {\n        return;\n    }\n\n    update_compaction_envs_on_remote_storage(rpc, keys, values);\n\n    // update local manual compaction status\n    {\n        zauto_write_lock l(_lock);\n        auto app = get_app(app_name);\n        app->helpers->reset_manual_compact_status();\n    }\n}\n\nbool server_state::parse_compaction_envs(start_manual_compact_rpc rpc,\n                                         std::vector<std::string> &keys,\n                                         std::vector<std::string> &values)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n\n    int32_t target_level = -1;\n    if (request.__isset.target_level) {\n        target_level = request.target_level;\n        if (target_level < -1) {\n            response.err = ERR_INVALID_PARAMETERS;\n            response.hint_msg = fmt::format(\n                \"invalid target_level({}), should in range of [-1, num_levels]\", target_level);\n            derror_f(\"{}\", response.hint_msg);\n            return false;\n        }\n    }\n    keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL);\n    values.emplace_back(std::to_string(target_level));\n\n    if (request.__isset.max_running_count) {\n        if (request.max_running_count < 0) {\n            response.err = ERR_INVALID_PARAMETERS;\n            response.hint_msg =\n                fmt::format(\"invalid max_running_count({}), should be greater than 0\",\n                            request.max_running_count);\n            derror_f(\"{}\", response.hint_msg);\n            return false;\n        }\n        if (request.max_running_count > 0) {\n            keys.emplace_back(replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT);\n            values.emplace_back(std::to_string(request.max_running_count));\n        }\n    }\n\n    std::string bottommost = \"skip\";\n    if (request.__isset.bottommost && request.bottommost) {\n        bottommost = \"force\";\n    }\n    keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION);\n    values.emplace_back(bottommost);\n\n    int64_t trigger_time = dsn_now_s();\n    if (request.__isset.trigger_time) {\n        trigger_time = request.trigger_time;\n    }\n    keys.emplace_back(replica_envs::MANUAL_COMPACT_ONCE_TRIGGER_TIME);\n    values.emplace_back(std::to_string(trigger_time));\n\n    return true;\n}\n\nvoid server_state::update_compaction_envs_on_remote_storage(start_manual_compact_rpc rpc,\n                                                            const std::vector<std::string> &keys,\n                                                            const std::vector<std::string> &values)\n{\n    const std::string &app_name = rpc.request().app_name;\n    std::string app_path = \"\";\n    app_info ainfo;\n    {\n        zauto_read_lock l(_lock);\n        auto app = get_app(app_name);\n        ainfo = *(reinterpret_cast<app_info *>(app.get()));\n        app_path = get_app_path(*app);\n    }\n    for (auto idx = 0; idx < keys.size(); idx++) {\n        ainfo.envs[keys[idx]] = values[idx];\n    }\n    do_update_app_info(app_path, ainfo, [this, app_name, keys, values, rpc](error_code ec) {\n        dassert_f(ec == ERR_OK, \"update app_info to remote storage failed with err = {}\", ec);\n\n        zauto_write_lock l(_lock);\n        auto app = get_app(app_name);\n        std::string old_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        for (int idx = 0; idx < keys.size(); idx++) {\n            app->envs[keys[idx]] = values[idx];\n        }\n        std::string new_envs = dsn::utils::kv_map_to_string(app->envs, ',', '=');\n        ddebug_f(\"update manual compaction envs succeed: old_envs = {}, new_envs = {}\",\n                 old_envs,\n                 new_envs);\n\n        rpc.response().err = ERR_OK;\n        rpc.response().hint_msg = \"succeed\";\n    });\n}\n\nvoid server_state::on_query_manual_compact_status(query_manual_compact_rpc rpc)\n{\n    const std::string &app_name = rpc.request().app_name;\n    auto &response = rpc.response();\n\n    std::shared_ptr<app_state> app;\n    {\n        zauto_read_lock l(_lock);\n        app = get_app(app_name);\n    }\n\n    if (app == nullptr || app->status != app_status::AS_AVAILABLE) {\n        response.err = app == nullptr ? ERR_APP_NOT_EXIST : ERR_APP_DROPPED;\n        response.hint_msg =\n            fmt::format(\"app {} is {}\",\n                        app_name,\n                        response.err == ERR_APP_NOT_EXIST ? \"not existed\" : \"not available\");\n        derror_f(\"{}\", response.hint_msg);\n        return;\n    }\n\n    int32_t total_progress = 0;\n    if (!app->helpers->get_manual_compact_progress(total_progress)) {\n        response.err = ERR_INVALID_STATE;\n        response.hint_msg = fmt::format(\"app {} is not manual compaction\", app_name);\n        dwarn_f(\"{}\", response.hint_msg);\n        return;\n    }\n\n    ddebug_f(\"query app {} manual compact succeed, total_progress = {}\", app_name, total_progress);\n    response.err = ERR_OK;\n    response.hint_msg = \"succeed\";\n    response.__set_progress(total_progress);\n}\n\ntemplate <typename Response>\nstd::shared_ptr<app_state> server_state::get_app_and_check_exist(const std::string &app_name,\n                                                                 Response &response) const\n{\n    auto app = get_app(app_name);\n    if (app == nullptr) {\n        response.err = ERR_APP_NOT_EXIST;\n        response.hint_message = fmt::format(\"app({}) does not exist\", app_name);\n    }\n\n    return app;\n}\n\ntemplate <typename Response>\nbool server_state::check_max_replica_count_consistent(const std::shared_ptr<app_state> &app,\n                                                      Response &response) const\n{\n    for (int i = 0; i < static_cast<int>(app->partitions.size()); ++i) {\n        const auto &partition_config = app->partitions[i];\n        if (partition_config.max_replica_count == app->max_replica_count) {\n            continue;\n        }\n\n        response.err = ERR_INCONSISTENT_STATE;\n        response.hint_message = fmt::format(\"partition_max_replica_count({}) != \"\n                                            \"app_max_replica_count({}) for partition {}\",\n                                            partition_config.max_replica_count,\n                                            app->max_replica_count,\n                                            i);\n        return false;\n    }\n\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::get_max_replica_count(configuration_get_max_replica_count_rpc rpc) const\n{\n    const auto &app_name = rpc.request().app_name;\n    auto &response = rpc.response();\n\n    zauto_read_lock l(_lock);\n\n    auto app = get_app_and_check_exist(app_name, response);\n    if (app == nullptr) {\n        response.max_replica_count = 0;\n        dwarn_f(\"failed to get max_replica_count: app_name={}, error_code={}, hint_message={}\",\n                app_name,\n                response.err.to_string(),\n                response.hint_message);\n        return;\n    }\n\n    if (!check_max_replica_count_consistent(app, response)) {\n        response.max_replica_count = 0;\n        derror_f(\"failed to get max_replica_count: app_name={}, app_id={}, error_code={}, \"\n                 \"hint_message={}\",\n                 app_name,\n                 app->app_id,\n                 response.err.to_string(),\n                 response.hint_message);\n        return;\n    }\n\n    response.err = ERR_OK;\n    response.max_replica_count = app->max_replica_count;\n\n    ddebug_f(\"get max_replica_count successfully: app_name={}, app_id={}, \"\n             \"max_replica_count={}\",\n             app_name,\n             app->app_id,\n             response.max_replica_count);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::set_max_replica_count(configuration_set_max_replica_count_rpc rpc)\n{\n    const auto &app_name = rpc.request().app_name;\n    const auto new_max_replica_count = rpc.request().max_replica_count;\n    auto &response = rpc.response();\n\n    int32_t app_id = 0;\n    std::shared_ptr<app_state> app;\n\n    {\n        zauto_read_lock l(_lock);\n\n        app = get_app_and_check_exist(app_name, response);\n        if (app == nullptr) {\n            response.old_max_replica_count = 0;\n            dwarn_f(\"failed to set max_replica_count: app_name={}, error_code={}, hint_message={}\",\n                    app_name,\n                    response.err.to_string(),\n                    response.hint_message);\n            return;\n        }\n\n        app_id = app->app_id;\n\n        if (!check_max_replica_count_consistent(app, response)) {\n            response.old_max_replica_count = 0;\n            derror_f(\"failed to set max_replica_count: app_name={}, app_id={}, error_code={}, \"\n                     \"hint_message={}\",\n                     app_name,\n                     app_id,\n                     response.err.to_string(),\n                     response.hint_message);\n            return;\n        }\n\n        response.old_max_replica_count = app->max_replica_count;\n\n        if (app->status != app_status::AS_AVAILABLE) {\n            response.err = ERR_INVALID_PARAMETERS;\n            response.hint_message = fmt::format(\"app({}) is not in available status\", app_name);\n            derror_f(\"failed to set max_replica_count: app_name={}, app_id={}, error_code={}, \"\n                     \"hint_message={}\",\n                     app_name,\n                     app_id,\n                     response.err.to_string(),\n                     response.hint_message);\n            return;\n        }\n    }\n\n    auto level = _meta_svc->get_function_level();\n    if (level <= meta_function_level::fl_freezed) {\n        response.err = ERR_STATE_FREEZED;\n        response.hint_message =\n            \"current meta function level is freezed, since there are too few alive nodes\";\n        derror_f(\n            \"failed to set max_replica_count: app_name={}, app_id={}, error_code={}, message={}\",\n            app_name,\n            app_id,\n            response.err.to_string(),\n            response.hint_message);\n        return;\n    }\n\n    if (!validate_target_max_replica_count(new_max_replica_count, response.hint_message)) {\n        response.err = ERR_INVALID_PARAMETERS;\n        dwarn_f(\n            \"failed to set max_replica_count: app_name={}, app_id={}, error_code={}, message={}\",\n            app_name,\n            app_id,\n            response.err.to_string(),\n            response.hint_message);\n        return;\n    }\n\n    if (new_max_replica_count == response.old_max_replica_count) {\n        response.err = ERR_OK;\n        response.hint_message = \"no need to update max_replica_count since it's not changed\";\n        dwarn_f(\"{}: app_name={}, app_id={}\", response.hint_message, app_name, app_id);\n        return;\n    }\n\n    ddebug_f(\"request for {} max_replica_count: app_name={}, app_id={}, \"\n             \"old_max_replica_count={}, new_max_replica_count={}\",\n             new_max_replica_count > response.old_max_replica_count ? \"increasing\" : \"decreasing\",\n             app_name,\n             app_id,\n             response.old_max_replica_count,\n             new_max_replica_count);\n\n    set_max_replica_count_env_updating(app, rpc);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::set_max_replica_count_env_updating(std::shared_ptr<app_state> &app,\n                                                      configuration_set_max_replica_count_rpc rpc)\n{\n    zauto_write_lock l(_lock);\n\n    auto iter = app->envs.find(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n    if (iter != app->envs.end()) {\n        std::vector<std::string> args;\n        utils::split_args(iter->second.c_str(), args, ';');\n        if (args[0] == \"updating\") {\n            auto &response = rpc.response();\n            response.err = ERR_OPERATION_DISABLED;\n            response.hint_message = fmt::format(\"max_replica_count of app({}) is being updated, \"\n                                                \"thus this request would be rejected\",\n                                                app->app_name);\n            derror_f(\"failed to set max_replica_count: app_name={}, app_id={}, error_code={}, \"\n                     \"hint_message={}\",\n                     app->app_name,\n                     app->app_id,\n                     response.err.to_string(),\n                     response.hint_message);\n            return;\n        }\n    }\n\n    const auto new_max_replica_count = rpc.request().max_replica_count;\n    const auto old_max_replica_count = rpc.response().old_max_replica_count;\n\n    ddebug_f(\"ready to update remote env of max_replica_count: app_name={}, app_id={}, \"\n             \"old_max_replica_count={}, new_max_replica_count={}, {}={}\",\n             app->app_name,\n             app->app_id,\n             old_max_replica_count,\n             new_max_replica_count,\n             replica_envs::UPDATE_MAX_REPLICA_COUNT,\n             app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n\n    auto ainfo = *(reinterpret_cast<app_info *>(app.get()));\n    ainfo.envs[replica_envs::UPDATE_MAX_REPLICA_COUNT] =\n        fmt::format(\"updating;{}\", new_max_replica_count);\n    auto app_path = get_app_path(*app);\n    do_update_app_info(app_path, ainfo, [this, app, rpc](error_code ec) mutable {\n        {\n            const auto new_max_replica_count = rpc.request().max_replica_count;\n\n            zauto_write_lock l(_lock);\n\n            dassert_f(ec == ERR_OK,\n                      \"An error that can't be handled occurs while updating remote env of \"\n                      \"max_replica_count: error_code={}, app_name={}, app_id={}, \"\n                      \"new_max_replica_count={}, {}={}\",\n                      ec.to_string(),\n                      app->app_name,\n                      app->app_id,\n                      new_max_replica_count,\n                      replica_envs::UPDATE_MAX_REPLICA_COUNT,\n                      app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n\n            app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT] =\n                fmt::format(\"updating;{}\", new_max_replica_count);\n            ddebug_f(\"both remote and local env of max_replica_count have been updated \"\n                     \"successfully: app_name={}, app_id={}, new_max_replica_count={}, {}={}\",\n                     app->app_name,\n                     app->app_id,\n                     new_max_replica_count,\n                     replica_envs::UPDATE_MAX_REPLICA_COUNT,\n                     app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n        }\n\n        do_update_max_replica_count(app, rpc);\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::do_update_max_replica_count(std::shared_ptr<app_state> &app,\n                                               configuration_set_max_replica_count_rpc rpc)\n{\n    std::shared_ptr<std::vector<error_code>> results;\n    {\n        zauto_write_lock l(_lock);\n\n        results.reset(new std::vector<error_code>(app->partition_count));\n        app->helpers->partitions_in_progress.store(app->partition_count);\n    }\n\n    auto on_partition_updated = [this, app, rpc, results](error_code ec,\n                                                          int32_t partition_index) mutable {\n        const auto &app_name = rpc.request().app_name;\n        const auto new_max_replica_count = rpc.request().max_replica_count;\n\n        results->at(partition_index) = ec;\n\n        auto uncompleted = --app->helpers->partitions_in_progress;\n        dassert_f(uncompleted >= 0,\n                  \"the uncompleted number should be >= 0 while updating partition-level\"\n                  \"max_replica_count: uncompleted={}, app_name={}, app_id={}, \"\n                  \"partition_index={}, partition_count={}, new_max_replica_count={}\",\n                  uncompleted,\n                  app_name,\n                  app->app_id,\n                  partition_index,\n                  app->partition_count,\n                  new_max_replica_count);\n\n        if (uncompleted > 0) {\n            return;\n        }\n\n        for (int32_t i = 0; i < app->partition_count; ++i) {\n            if (results->at(i) == ERR_OK) {\n                continue;\n            }\n\n            dassert_f(false,\n                      \"An error that can't be handled occurs while updating partition-level\"\n                      \"max_replica_count: error_code={}, app_name={}, app_id={}, \"\n                      \"partition_index={}, partition_count={}, new_max_replica_count={}\",\n                      ec.to_string(),\n                      app_name,\n                      app->app_id,\n                      i,\n                      app->partition_count,\n                      new_max_replica_count);\n        }\n\n        ddebug_f(\"all partitions have been changed to the new max_replica_count, ready to update \"\n                 \"the app-level max_replica_count: app_name={}, app_id={}, partition_count={}, \"\n                 \"new_max_replica_count={}\",\n                 app_name,\n                 app->app_id,\n                 app->partition_count,\n                 new_max_replica_count);\n\n        update_app_max_replica_count(app, rpc);\n    };\n\n    {\n        const auto new_max_replica_count = rpc.request().max_replica_count;\n\n        zauto_write_lock l(_lock);\n        for (int32_t i = 0; i < app->partition_count; ++i) {\n            update_partition_max_replica_count(app, i, new_max_replica_count, on_partition_updated);\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::update_app_max_replica_count(std::shared_ptr<app_state> &app,\n                                                configuration_set_max_replica_count_rpc rpc)\n{\n    const auto new_max_replica_count = rpc.request().max_replica_count;\n    const auto old_max_replica_count = rpc.response().old_max_replica_count;\n\n    ddebug_f(\"ready to update remote app-level max_replica_count: app_name={}, app_id={}, \"\n             \"old_max_replica_count={}, new_max_replica_count={}, {}={}\",\n             app->app_name,\n             app->app_id,\n             old_max_replica_count,\n             new_max_replica_count,\n             replica_envs::UPDATE_MAX_REPLICA_COUNT,\n             app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n\n    auto ainfo = *(reinterpret_cast<app_info *>(app.get()));\n    ainfo.max_replica_count = new_max_replica_count;\n    ainfo.envs.erase(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n    auto app_path = get_app_path(*app);\n    do_update_app_info(app_path, ainfo, [this, app, rpc](error_code ec) mutable {\n        const auto new_max_replica_count = rpc.request().max_replica_count;\n        const auto old_max_replica_count = rpc.response().old_max_replica_count;\n\n        zauto_write_lock l(_lock);\n\n        dassert_f(ec == ERR_OK,\n                  \"An error that can't be handled occurs while updating remote app-level \"\n                  \"max_replica_count: error_code={}, app_name={}, app_id={}, \"\n                  \"old_max_replica_count={}, new_max_replica_count={}, {}={}\",\n                  ec.to_string(),\n                  app->app_name,\n                  app->app_id,\n                  old_max_replica_count,\n                  new_max_replica_count,\n                  replica_envs::UPDATE_MAX_REPLICA_COUNT,\n                  app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n\n        dassert_f(old_max_replica_count == app->max_replica_count,\n                  \"app-level max_replica_count has been updated to remote storage, however \"\n                  \"old_max_replica_count from response is not consistent with current local \"\n                  \"max_replica_count: app_name={}, app_id={}, old_max_replica_count={}, \"\n                  \"local_max_replica_count={}, new_max_replica_count={}\",\n                  app->app_name,\n                  app->app_id,\n                  old_max_replica_count,\n                  app->max_replica_count,\n                  new_max_replica_count);\n\n        app->max_replica_count = new_max_replica_count;\n        app->envs.erase(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n        ddebug_f(\"both remote and local app-level max_replica_count have been updated \"\n                 \"successfully: app_name={}, app_id={}, old_max_replica_count={}, \"\n                 \"new_max_replica_count={}\",\n                 app->app_name,\n                 app->app_id,\n                 old_max_replica_count,\n                 new_max_replica_count);\n\n        auto &response = rpc.response();\n        response.err = ERR_OK;\n    });\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::update_partition_max_replica_count(std::shared_ptr<app_state> &app,\n                                                      int32_t partition_index,\n                                                      int32_t new_max_replica_count,\n                                                      partition_callback on_partition_updated)\n{\n    dassert_f(partition_index < app->partition_count,\n              \"partition_index should be < partition_count: app_name={}, app_id={}, \"\n              \"partition_index={}, partition_count={}, new_max_replica_count={}\",\n              app->app_name,\n              app->app_id,\n              partition_index,\n              app->partition_count,\n              new_max_replica_count);\n\n    const auto &old_partition_config = app->partitions[partition_index];\n    const auto old_max_replica_count = old_partition_config.max_replica_count;\n\n    if (new_max_replica_count == old_max_replica_count) {\n        dwarn_f(\"partition-level max_replica_count has been updated: app_name={}, \"\n                \"app_id={}, partition_index={}, new_max_replica_count={}\",\n                app->app_name,\n                app->app_id,\n                partition_index,\n                new_max_replica_count);\n        return;\n    }\n\n    auto &context = app->helpers->contexts[partition_index];\n    if (context.stage == config_status::pending_remote_sync) {\n        ddebug_f(\"have to wait until another request which is syncing with remote storage \"\n                 \"is finished, then process the current request of updating max_replica_count: \"\n                 \"app_name={}, app_id={}, partition_index={}, new_max_replica_count={}\",\n                 app->app_name,\n                 app->app_id,\n                 partition_index,\n                 new_max_replica_count);\n\n        tasking::enqueue(\n            LPC_META_STATE_HIGH,\n            tracker(),\n            [this, app, partition_index, new_max_replica_count, on_partition_updated]() mutable {\n                update_partition_max_replica_count(\n                    app, partition_index, new_max_replica_count, on_partition_updated);\n            },\n            server_state::sStateHash,\n            std::chrono::milliseconds(100));\n        return;\n    }\n\n    dassert_f(context.stage == config_status::not_pending,\n              \"invalid config status while updating max_replica_count: context.stage={}, \"\n              \"app_name={}, app_id={}, partition_index={}, new_max_replica_count={}\",\n              enum_to_string(context.stage),\n              app->app_name,\n              app->app_id,\n              partition_index,\n              new_max_replica_count);\n\n    context.stage = config_status::pending_remote_sync;\n    context.pending_sync_request.reset();\n    context.msg = nullptr;\n\n    auto new_partition_config = old_partition_config;\n    new_partition_config.max_replica_count = new_max_replica_count;\n    ++(new_partition_config.ballot);\n    context.pending_sync_task = update_partition_max_replica_count_on_remote(\n        app, new_partition_config, on_partition_updated);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\ntask_ptr server_state::update_partition_max_replica_count_on_remote(\n    std::shared_ptr<app_state> &app,\n    const partition_configuration &new_partition_config,\n    partition_callback on_partition_updated)\n{\n    const auto &gpid = new_partition_config.pid;\n    const auto partition_index = gpid.get_partition_index();\n    const auto new_max_replica_count = new_partition_config.max_replica_count;\n    const auto new_ballot = new_partition_config.ballot;\n\n    const auto level = _meta_svc->get_function_level();\n    if (level <= meta_function_level::fl_blind) {\n        dwarn_f(\"have to wait until meta level becomes more than fl_blind, then process the \"\n                \"current request of updating max_replica_count: current_meta_level={}, \"\n                \"app_name={}, app_id={}, partition_index={}, new_max_replica_count={}, \"\n                \"new_ballot={}\",\n                _meta_function_level_VALUES_TO_NAMES.find(level)->second,\n                app->app_name,\n                app->app_id,\n                partition_index,\n                new_max_replica_count,\n                new_ballot);\n\n        // NOTICE: pending_sync_task should be reassigned\n        return tasking::enqueue(LPC_META_STATE_HIGH,\n                                tracker(),\n                                [this, app, new_partition_config, on_partition_updated]() mutable {\n                                    const auto &gpid = new_partition_config.pid;\n                                    const auto partition_index = gpid.get_partition_index();\n\n                                    zauto_write_lock l(_lock);\n\n                                    auto &context = app->helpers->contexts[partition_index];\n                                    context.pending_sync_task =\n                                        update_partition_max_replica_count_on_remote(\n                                            app, new_partition_config, on_partition_updated);\n                                },\n                                server_state::sStateHash,\n                                std::chrono::seconds(1));\n    }\n\n    ddebug_f(\"request for updating partition-level max_replica_count on remote storage: \"\n             \"app_name={}, app_id={}, partition_id={}, new_max_replica_count={}, new_ballot={}\",\n             app->app_name,\n             app->app_id,\n             partition_index,\n             new_max_replica_count,\n             new_ballot);\n\n    auto partition_path = get_partition_path(gpid);\n    auto json_config =\n        dsn::json::json_forwarder<partition_configuration>::encode(new_partition_config);\n    return _meta_svc->get_remote_storage()->set_data(\n        partition_path,\n        json_config,\n        LPC_META_STATE_HIGH,\n        std::bind(&server_state::on_update_partition_max_replica_count_on_remote_reply,\n                  this,\n                  std::placeholders::_1,\n                  app,\n                  new_partition_config,\n                  on_partition_updated),\n        tracker());\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::on_update_partition_max_replica_count_on_remote_reply(\n    error_code ec,\n    std::shared_ptr<app_state> &app,\n    const partition_configuration &new_partition_config,\n    partition_callback on_partition_updated)\n{\n    const auto &gpid = new_partition_config.pid;\n    const auto partition_index = gpid.get_partition_index();\n    const auto new_max_replica_count = new_partition_config.max_replica_count;\n    const auto new_ballot = new_partition_config.ballot;\n\n    zauto_write_lock l(_lock);\n\n    ddebug_f(\"reply for updating partition-level max_replica_count on remote storage: \"\n             \"error_code={}, app_name={}, app_id={}, partition_id={}, new_max_replica_count={}, \"\n             \"new_ballot={}\",\n             ec.to_string(),\n             app->app_name,\n             app->app_id,\n             partition_index,\n             new_max_replica_count,\n             new_ballot);\n\n    auto &context = app->helpers->contexts[partition_index];\n    if (ec == ERR_TIMEOUT) {\n        // NOTICE: pending_sync_task need to be reassigned\n        context.pending_sync_task =\n            tasking::enqueue(LPC_META_STATE_HIGH,\n                             tracker(),\n                             [this, app, new_partition_config, on_partition_updated]() mutable {\n                                 const auto &gpid = new_partition_config.pid;\n                                 const auto partition_index = gpid.get_partition_index();\n\n                                 zauto_write_lock l(_lock);\n\n                                 auto &context = app->helpers->contexts[partition_index];\n                                 context.pending_sync_task =\n                                     update_partition_max_replica_count_on_remote(\n                                         app, new_partition_config, on_partition_updated);\n                             },\n                             server_state::sStateHash,\n                             std::chrono::seconds(1));\n        return;\n    }\n\n    if (ec != ERR_OK) {\n        on_partition_updated(ec, partition_index);\n        return;\n    }\n\n    update_partition_max_replica_count_locally(app, new_partition_config);\n\n    context.pending_sync_task = nullptr;\n    context.pending_sync_request.reset();\n    context.stage = config_status::not_pending;\n    context.msg = nullptr;\n\n    on_partition_updated(ec, partition_index);\n}\n\n// ThreadPool: THREAD_POOL_META_STATE\nvoid server_state::update_partition_max_replica_count_locally(\n    std::shared_ptr<app_state> &app, const partition_configuration &new_partition_config)\n{\n    const auto &gpid = new_partition_config.pid;\n    const auto partition_index = gpid.get_partition_index();\n    const auto new_max_replica_count = new_partition_config.max_replica_count;\n    const auto new_ballot = new_partition_config.ballot;\n\n    auto &old_partition_config = app->partitions[gpid.get_partition_index()];\n    const auto old_max_replica_count = old_partition_config.max_replica_count;\n    const auto old_ballot = old_partition_config.ballot;\n\n    dassert_f(old_ballot + 1 == new_ballot,\n              \"invalid ballot while updating local max_replica_count: app_name={}, app_id={}, \"\n              \"partition_id={}, old_max_replica_count={}, new_max_replica_count={}, \"\n              \"old_ballot={}, new_ballot={}\",\n              app->app_name,\n              app->app_id,\n              partition_index,\n              old_max_replica_count,\n              new_max_replica_count,\n              old_ballot,\n              new_ballot);\n\n    std::string old_config_str(boost::lexical_cast<std::string>(old_partition_config));\n    std::string new_config_str(boost::lexical_cast<std::string>(new_partition_config));\n\n    old_partition_config = new_partition_config;\n\n    ddebug_f(\"local partition-level max_replica_count has been changed successfully: \",\n             \"app_name={}, app_id={}, partition_id={}, old_partition_config={}, \"\n             \"new_partition_config={}\",\n             app->app_name,\n             app->app_id,\n             partition_index,\n             old_config_str,\n             new_config_str);\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid server_state::recover_from_max_replica_count_env()\n{\n    std::vector<std::pair<std::shared_ptr<app_state>, int32_t>> tasks;\n    {\n        zauto_read_lock l(_lock);\n        for (auto &e : _exist_apps) {\n            auto &app = e.second;\n            if (app->status != app_status::AS_AVAILABLE) {\n                continue;\n            }\n\n            auto iter = app->envs.find(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n            if (iter == app->envs.end()) {\n                continue;\n            }\n\n            std::vector<std::string> args;\n            utils::split_args(iter->second.c_str(), args, ';');\n            if (args.empty() || args[0] != \"updating\") {\n                continue;\n            }\n\n            int32_t max_replica_count = 0;\n            if (args.size() < 2 || !dsn::buf2int32(args[1], max_replica_count) ||\n                max_replica_count <= 0) {\n                dassert_f(false,\n                          \"invalid max_replica_count_env: app_name={}, app_id={}, \"\n                          \"max_replica_count={}, {}={}\",\n                          app->app_name,\n                          app->app_id,\n                          app->max_replica_count,\n                          replica_envs::UPDATE_MAX_REPLICA_COUNT,\n                          iter->second);\n            }\n\n            tasks.emplace_back(app, max_replica_count);\n        }\n    }\n\n    dsn::task_tracker tracker;\n\n    for (auto &task : tasks) {\n        recover_all_partitions_max_replica_count(task.first, task.second, tracker);\n    }\n    tracker.wait_outstanding_tasks();\n\n    for (auto &task : tasks) {\n        recover_app_max_replica_count(task.first, task.second, tracker);\n    }\n    tracker.wait_outstanding_tasks();\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid server_state::recover_all_partitions_max_replica_count(std::shared_ptr<app_state> &app,\n                                                            int32_t new_max_replica_count,\n                                                            dsn::task_tracker &tracker)\n{\n    for (int i = 0; i < app->partition_count; ++i) {\n        zauto_read_lock l(_lock);\n\n        auto new_pc = app->partitions[i];\n        if (new_pc.max_replica_count == new_max_replica_count) {\n            dwarn_f(\"no need to recover partition-level max_replica_count since it has been \"\n                    \"updated before: app_name={}, app_id={}, partition_index={}, \"\n                    \"partition_count={}, new_max_replica_count={}\",\n                    app->app_name,\n                    app->app_id,\n                    i,\n                    app->partition_count,\n                    new_max_replica_count);\n            continue;\n        }\n\n        ddebug_f(\"ready to recover partition-level max_replica_count: app_name={}, app_id={}, \"\n                 \"partition_index={}, partition_count={}, old_max_replica_count={}, \"\n                 \"new_max_replica_count={}\",\n                 app->app_name,\n                 app->app_id,\n                 i,\n                 app->partition_count,\n                 app->max_replica_count,\n                 new_max_replica_count);\n\n        new_pc.max_replica_count = new_max_replica_count;\n        ++(new_pc.ballot);\n        auto partition_path = get_partition_path(new_pc.pid);\n        auto value = dsn::json::json_forwarder<partition_configuration>::encode(new_pc);\n        _meta_svc->get_remote_storage()->set_data(\n            partition_path,\n            value,\n            LPC_META_CALLBACK,\n            [this, app, i, new_pc](error_code ec) mutable {\n                zauto_write_lock l(_lock);\n\n                auto &old_pc = app->partitions[i];\n                std::string old_pc_str(boost::lexical_cast<std::string>(old_pc));\n                std::string new_pc_str(boost::lexical_cast<std::string>(new_pc));\n\n                dassert_f(ec == ERR_OK,\n                          \"An error that can't be handled occurs while recovering remote \"\n                          \"partition-level max_replica_count: error_code={}, app_name={}, \"\n                          \"app_id={}, partition_index={}, partition_count={}, \"\n                          \"old_partition_config={}, new_partition_config={}\",\n                          ec.to_string(),\n                          app->app_name,\n                          app->app_id,\n                          i,\n                          app->partition_count,\n                          old_pc_str,\n                          new_pc_str);\n\n                dassert_f(old_pc.ballot + 1 == new_pc.ballot,\n                          \"invalid ballot while recovering max_replica_count: app_name={}, \"\n                          \"app_id={}, partition_index={}, partition_count={}, \"\n                          \"old_partition_config={}, new_partition_config={}\",\n                          app->app_name,\n                          app->app_id,\n                          i,\n                          app->partition_count,\n                          old_pc_str,\n                          new_pc_str);\n\n                old_pc = new_pc;\n\n                ddebug_f(\"partition-level max_replica_count has been recovered successfully: \"\n                         \"app_name={}, app_id={}, partition_index={}, partition_count={}, \"\n                         \"old_partition_config={}, new_partition_config={}\",\n                         app->app_name,\n                         app->app_id,\n                         i,\n                         app->partition_count,\n                         old_pc_str,\n                         new_pc_str);\n            },\n            &tracker);\n    }\n}\n\n// ThreadPool: THREAD_POOL_META_SERVER\nvoid server_state::recover_app_max_replica_count(std::shared_ptr<app_state> &app,\n                                                 int32_t new_max_replica_count,\n                                                 dsn::task_tracker &tracker)\n{\n    zauto_read_lock l(_lock);\n\n    ddebug_f(\"ready to recover app-level max_replica_count: app_name={}, app_id={}, \"\n             \"old_max_replica_count={}, new_max_replica_count={}, {}={}\",\n             app->app_name,\n             app->app_id,\n             app->max_replica_count,\n             new_max_replica_count,\n             replica_envs::UPDATE_MAX_REPLICA_COUNT,\n             app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT]);\n\n    auto ainfo = *(reinterpret_cast<app_info *>(app.get()));\n    ainfo.max_replica_count = new_max_replica_count;\n    ainfo.envs.erase(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n    auto app_path = get_app_path(*app);\n    auto value = dsn::json::json_forwarder<app_info>::encode(ainfo);\n    _meta_svc->get_remote_storage()->set_data(\n        app_path,\n        value,\n        LPC_META_CALLBACK,\n        [this, app, new_max_replica_count](error_code ec) mutable {\n            zauto_write_lock l(_lock);\n\n            auto old_max_replica_count = app->max_replica_count;\n            dassert_f(ec == ERR_OK,\n                      \"An error that can't be handled occurs while recovering remote \"\n                      \"app-level max_replica_count: error_code={}, app_name={}, app_id={}, \"\n                      \"old_max_replica_count={}, new_max_replica_count={}\",\n                      ec.to_string(),\n                      app->app_name,\n                      app->app_id,\n                      old_max_replica_count,\n                      new_max_replica_count);\n\n            app->max_replica_count = new_max_replica_count;\n            app->envs.erase(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n\n            ddebug_f(\"app-level max_replica_count has been recovered successfully: \"\n                     \"app_name={}, app_id={}, old_max_replica_count={}, \"\n                     \"new_max_replica_count={}\",\n                     app->app_name,\n                     app->app_id,\n                     old_max_replica_count,\n                     app->max_replica_count);\n        },\n        &tracker);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/server_state.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     the meta server's server_state, definition file\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     2016-04-25, Weijie Sun(sunweijie at xiaomi.com), refactor\n */\n\n#pragma once\n\n#include <boost/lexical_cast.hpp>\n#include <dsn/dist/replication/replication_other_types.h>\n#include <dsn/dist/block_service.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <gtest/gtest_prod.h>\n#include <unordered_map>\n\n#include \"common/replication_common.h\"\n#include \"meta_data.h\"\n#include \"meta_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\nnamespace test {\nclass test_checker;\n}\n\ntypedef std::function<void(const app_mapper & /*new_config*/)> config_change_subscriber;\ntypedef std::function<void(const migration_list &)> replica_migration_subscriber;\n\nclass meta_service;\n\n//\n// Notes for server_state\n//\n// A. structure of remote storage\n//\n// the tree structure on remote storage are like this:\n// _apps_root/<app-id1>/0\n// _apps_root/<app-id1>/1\n// ....\n// _apps_root/<app-id1>/n\n// _apps_root/<app-id2>/0\n// ...\n// the content in _apps_root/<app-id> is a json string for class \"app-info\"\n// the content in _apps_root/<app-id>/<partition-id> is a json string for class\n// \"partition-configuration\"\n//\n// B. app management\n//\n// When recving create-app request from the DDL client(let's say, NEW-APP-NAME with NEW-APP-ID),\n// we first create the root-node for this app, i.e: _apps_root/<NEW-APP-ID>,\n// then all the partition nodes of NEW-APP-NAME is created asynchronously with multiple tasks.\n// Reasons for this are:\n//    1. an app may have thousands/millions of replicas, creating all these things in a task is\n//    time-consuming,\n//       which may block the update-thread for a long time.\n//    2. there are so many requests to the remote storage that it is very likely to timeout if all\n//    requests are in a task\n// For the same reason, dropping and recalling for an app also have similar implementations.\n// Generally, an app may have serveral status of two kinds:\n//    a. staging-status: creating, dropping, recalling\n//    b. stable-status: available, dropped\n//\n// Notice: in remote storage, only stable status are stored. We can tell if an app-state is in\n// staging by checking if\n// all of its partitions are ready:\n//    1. for available app, it should have as many partitions as app_info.partition_count AND\n//       ALL of the partitions shouldn't have the flags DROPPED\n//    2. for dropped app, All of the partitions should have the flags DROPPED\n// If theses constraints are not satisfied, it means that some work are not finished.\n// Meta-server will check these constraints and continue the creating/dropping/recalling work if\n// necessary.\n//\n// C. persistence of meta data\n// D. thread-model of meta server\n// E. load balancer\n\nclass server_state\n{\npublic:\n    static const int sStateHash = 0;\n\npublic:\n    server_state();\n    ~server_state();\n\n    void initialize(meta_service *meta_svc, const std::string &apps_root);\n    error_code initialize_data_structure();\n    void register_cli_commands();\n\n    void lock_read(zauto_read_lock &other);\n    void lock_write(zauto_write_lock &other);\n    const meta_view get_meta_view() { return {&_all_apps, &_nodes}; }\n    std::shared_ptr<app_state> get_app(const std::string &name) const\n    {\n        auto iter = _exist_apps.find(name);\n        if (iter == _exist_apps.end())\n            return nullptr;\n        return iter->second;\n    }\n    std::shared_ptr<app_state> get_app(int32_t app_id) const\n    {\n        auto iter = _all_apps.find(app_id);\n        if (iter == _all_apps.end())\n            return nullptr;\n        return iter->second;\n    }\n\n    void query_configuration_by_index(const configuration_query_by_index_request &request,\n                                      /*out*/ configuration_query_by_index_response &response);\n    bool query_configuration_by_gpid(const dsn::gpid id, /*out*/ partition_configuration &config);\n\n    // app options\n    void create_app(dsn::message_ex *msg);\n    void drop_app(dsn::message_ex *msg);\n    void recall_app(dsn::message_ex *msg);\n    void list_apps(const configuration_list_apps_request &request,\n                   configuration_list_apps_response &response);\n    void restore_app(dsn::message_ex *msg);\n\n    // app env operations\n    void set_app_envs(const app_env_rpc &env_rpc);\n    void del_app_envs(const app_env_rpc &env_rpc);\n    void clear_app_envs(const app_env_rpc &env_rpc);\n\n    // update configuration\n    void on_config_sync(configuration_query_by_node_rpc rpc);\n    void on_update_configuration(std::shared_ptr<configuration_update_request> &request,\n                                 dsn::message_ex *msg);\n\n    // dump & restore\n    error_code dump_from_remote_storage(const char *local_path, bool sync_immediately);\n    error_code restore_from_local_storage(const char *local_path);\n\n    void on_change_node_state(rpc_address node, bool is_alive);\n    void on_propose_balancer(const configuration_balancer_request &request,\n                             configuration_balancer_response &response);\n    void on_start_recovery(const configuration_recovery_request &request,\n                           configuration_recovery_response &response);\n    void on_recv_restore_report(configuration_report_restore_status_rpc rpc);\n\n    void on_query_restore_status(configuration_query_restore_rpc rpc);\n\n    // manual compaction\n    void on_start_manual_compact(start_manual_compact_rpc rpc);\n    void on_query_manual_compact_status(query_manual_compact_rpc rpc);\n\n    // get/set max_replica_count of an app\n    void get_max_replica_count(configuration_get_max_replica_count_rpc rpc) const;\n    void set_max_replica_count(configuration_set_max_replica_count_rpc rpc);\n    void recover_from_max_replica_count_env();\n\n    // return true if no need to do any actions\n    bool check_all_partitions();\n    void get_cluster_balance_score(double &primary_stddev /*out*/, double &total_stddev /*out*/);\n    void clear_proposals();\n\n    int count_staging_app();\n    // for test\n    void set_config_change_subscriber_for_test(config_change_subscriber subscriber);\n    void set_replica_migration_subscriber_for_test(replica_migration_subscriber subscriber);\n\n    task_tracker *tracker() { return &_tracker; }\n    void wait_all_task() { _tracker.wait_outstanding_tasks(); }\n\nprivate:\n    FRIEND_TEST(backup_service_test, test_invalid_backup_request);\n\n    //-1 means waiting forever\n    bool spin_wait_staging(int timeout_seconds = -1);\n    bool can_run_balancer();\n\n    // user should lock it first\n    void update_partition_perf_counter();\n\n    error_code dump_app_states(const char *local_path,\n                               const std::function<app_state *()> &iterator);\n    error_code sync_apps_from_remote_storage();\n    // sync local state to remote storage,\n    // if return OK, all states are synced correctly, and all apps are in stable state\n    // else indicate error that remote storage responses\n    error_code sync_apps_to_remote_storage();\n\n    error_code sync_apps_from_replica_nodes(const std::vector<dsn::rpc_address> &node_list,\n                                            bool skip_bad_nodes,\n                                            bool skip_lost_partitions,\n                                            std::string &hint_message);\n    void\n    sync_app_from_backup_media(const configuration_restore_request &request,\n                               std::function<void(dsn::error_code, const dsn::blob &)> &&callback);\n    std::pair<dsn::error_code, std::shared_ptr<app_state>> restore_app_info(\n        dsn::message_ex *msg, const configuration_restore_request &req, const dsn::blob &app_info);\n\n    error_code initialize_default_apps();\n    void initialize_node_state();\n\n    void check_consistency(const dsn::gpid &gpid);\n\n    error_code construct_apps(const std::vector<query_app_info_response> &query_app_responses,\n                              const std::vector<dsn::rpc_address> &replica_nodes,\n                              std::string &hint_message);\n    error_code construct_partitions(\n        const std::vector<query_replica_info_response> &query_replica_info_responses,\n        const std::vector<dsn::rpc_address> &replica_nodes,\n        bool skip_lost_partitions,\n        std::string &hint_message);\n\n    void do_app_create(std::shared_ptr<app_state> &app);\n    void do_app_drop(std::shared_ptr<app_state> &app);\n    void do_app_recall(std::shared_ptr<app_state> &app);\n    void init_app_partition_node(std::shared_ptr<app_state> &app, int pidx, task_ptr callback);\n    // do_update_app_info()\n    //  -- ensure update app_info to remote storage succeed, if timeout, it will retry autoly\n    void do_update_app_info(const std::string &app_path,\n                            const app_info &info,\n                            const std::function<void(error_code)> &cb);\n\n    task_ptr\n    update_configuration_on_remote(std::shared_ptr<configuration_update_request> &config_request);\n    void\n    on_update_configuration_on_remote_reply(error_code ec,\n                                            std::shared_ptr<configuration_update_request> &request);\n    void\n    update_configuration_locally(app_state &app,\n                                 std::shared_ptr<configuration_update_request> &config_request);\n    void request_check(const partition_configuration &old,\n                       const configuration_update_request &request);\n    void recall_partition(std::shared_ptr<app_state> &app, int pidx);\n    void drop_partition(std::shared_ptr<app_state> &app, int pidx);\n    void downgrade_primary_to_inactive(std::shared_ptr<app_state> &app, int pidx);\n    void downgrade_secondary_to_inactive(std::shared_ptr<app_state> &app,\n                                         int pidx,\n                                         const rpc_address &node);\n    void downgrade_stateless_nodes(std::shared_ptr<app_state> &app,\n                                   int pidx,\n                                   const rpc_address &address);\n\n    void on_partition_node_dead(std::shared_ptr<app_state> &app,\n                                int pidx,\n                                const dsn::rpc_address &address);\n    void send_proposal(rpc_address target, const configuration_update_request &proposal);\n    void send_proposal(const configuration_proposal_action &action,\n                       const partition_configuration &pc,\n                       const app_state &app);\n\n    // util function\n    int32_t next_app_id() const\n    {\n        if (_all_apps.empty())\n            return 1;\n        // return the max_id + 1\n        return ((--_all_apps.end())->first) + 1;\n    }\n    std::string get_app_path(const app_state &app) const\n    {\n        return _apps_root + \"/\" + boost::lexical_cast<std::string>(app.app_id);\n    }\n    std::string get_partition_path(const dsn::gpid &gpid) const\n    {\n        std::stringstream oss;\n        oss << _apps_root << \"/\" << gpid.get_app_id() << \"/\" << gpid.get_partition_index();\n        return oss.str();\n    }\n    std::string get_partition_path(const app_state &app, int partition_id) const\n    {\n        std::stringstream oss;\n        oss << _apps_root << \"/\" << app.app_id << \"/\" << partition_id;\n        return oss.str();\n    }\n\n    void process_one_partition(std::shared_ptr<app_state> &app);\n    void transition_staging_state(std::shared_ptr<app_state> &app);\n\n    // check whether a max replica count is valid especially for a new app\n    bool validate_target_max_replica_count(int32_t max_replica_count,\n                                           std::string &hint_message) const;\n    bool validate_target_max_replica_count(int32_t max_replica_count) const;\n\n    template <typename Response>\n    std::shared_ptr<app_state> get_app_and_check_exist(const std::string &app_name,\n                                                       Response &response) const;\n\n    template <typename Response>\n    bool check_max_replica_count_consistent(const std::shared_ptr<app_state> &app,\n                                            Response &response) const;\n\n    void set_max_replica_count_env_updating(std::shared_ptr<app_state> &app,\n                                            configuration_set_max_replica_count_rpc rpc);\n    using partition_callback = std::function<void(error_code, int32_t)>;\n    void do_update_max_replica_count(std::shared_ptr<app_state> &app,\n                                     configuration_set_max_replica_count_rpc rpc);\n    void update_app_max_replica_count(std::shared_ptr<app_state> &app,\n                                      configuration_set_max_replica_count_rpc rpc);\n    void update_partition_max_replica_count(std::shared_ptr<app_state> &app,\n                                            int32_t partition_index,\n                                            int32_t new_max_replica_count,\n                                            partition_callback on_partition_updated);\n    task_ptr update_partition_max_replica_count_on_remote(\n        std::shared_ptr<app_state> &app,\n        const partition_configuration &new_partition_config,\n        partition_callback on_partition_updated);\n    void on_update_partition_max_replica_count_on_remote_reply(\n        error_code ec,\n        std::shared_ptr<app_state> &app,\n        const partition_configuration &new_partition_config,\n        partition_callback on_partition_updated);\n    void\n    update_partition_max_replica_count_locally(std::shared_ptr<app_state> &app,\n                                               const partition_configuration &new_partition_config);\n\n    void recover_all_partitions_max_replica_count(std::shared_ptr<app_state> &app,\n                                                  int32_t max_replica_count,\n                                                  dsn::task_tracker &tracker);\n    void recover_app_max_replica_count(std::shared_ptr<app_state> &app,\n                                       int32_t max_replica_count,\n                                       dsn::task_tracker &tracker);\n\n    // Used for `on_start_manual_compaction`\n    bool parse_compaction_envs(start_manual_compact_rpc rpc,\n                               std::vector<std::string> &keys,\n                               std::vector<std::string> &values);\n    void update_compaction_envs_on_remote_storage(start_manual_compact_rpc rpc,\n                                                  const std::vector<std::string> &keys,\n                                                  const std::vector<std::string> &values);\n\nprivate:\n    friend class bulk_load_service;\n    friend class bulk_load_service_test;\n    friend class meta_app_operation_test;\n    friend class meta_duplication_service;\n    friend class meta_duplication_service_test;\n    friend class meta_partition_guardian_test;\n    friend class meta_split_service;\n    friend class meta_split_service_test;\n    friend class meta_service_test_app;\n    friend class meta_test_base;\n    friend class test::test_checker;\n    friend class server_state_restore_test;\n    friend class meta_app_compaction_test;\n\n    FRIEND_TEST(meta_backup_service_test, test_add_backup_policy);\n    FRIEND_TEST(policy_context_test, test_app_dropped_during_backup);\n    FRIEND_TEST(policy_context_test, test_backup_failed);\n\n    dsn::task_tracker _tracker;\n\n    meta_service *_meta_svc;\n    std::string _apps_root;\n\n    mutable zrwlock_nr _lock;\n    node_mapper _nodes;\n\n    // available apps, dropping apps, creating apps: name -> app_state\n    std::map<std::string, std::shared_ptr<app_state>> _exist_apps;\n    //_exist_apps + dropped apps: app_id -> app_state\n    app_mapper _all_apps;\n\n    // for load balancer\n    migration_list _temporary_list;\n\n    // for test\n    config_change_subscriber _config_change_subscriber;\n    replica_migration_subscriber _replica_migration_subscriber;\n\n    bool _add_secondary_enable_flow_control;\n    int32_t _add_secondary_max_count_for_one_node;\n    dsn_handle_t _cli_dump_handle;\n    dsn_handle_t _ctrl_add_secondary_enable_flow_control;\n    dsn_handle_t _ctrl_add_secondary_max_count_for_one_node;\n\n    perf_counter_wrapper _dead_partition_count;\n    perf_counter_wrapper _unreadable_partition_count;\n    perf_counter_wrapper _unwritable_partition_count;\n    perf_counter_wrapper _writable_ill_partition_count;\n    perf_counter_wrapper _healthy_partition_count;\n    perf_counter_wrapper _recent_update_config_count;\n    perf_counter_wrapper _recent_partition_change_unwritable_count;\n    perf_counter_wrapper _recent_partition_change_writable_count;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/server_state_restore.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <boost/lexical_cast.hpp>\n#include <dsn/dist/block_service.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n\n#include \"block_service/block_service_manager.h\"\n#include \"common/backup_common.h\"\n#include \"meta_service.h\"\n#include \"server_state.h\"\n\nusing namespace dsn::dist::block_service;\n\nnamespace dsn {\nnamespace replication {\n\nvoid server_state::sync_app_from_backup_media(\n    const configuration_restore_request &request,\n    std::function<void(error_code, const blob &)> &&callback)\n{\n    dsn::ref_ptr<dsn::future_task<dsn::error_code, dsn::blob>> callback_tsk(\n        new dsn::future_task<dsn::error_code, dsn::blob>(\n            LPC_RESTORE_BACKGROUND, std::move(callback), 0));\n\n    block_filesystem *blk_fs =\n        _meta_svc->get_block_service_manager().get_or_create_block_filesystem(\n            request.backup_provider_name);\n    if (blk_fs == nullptr) {\n        derror(\"acquire block_filesystem(%s) failed\", request.backup_provider_name.c_str());\n        callback_tsk->enqueue_with(ERR_INVALID_PARAMETERS, dsn::blob());\n        return;\n    }\n\n    std::string backup_root = request.cluster_name;\n    if (request.__isset.restore_path) {\n        backup_root = dsn::utils::filesystem::path_combine(request.restore_path, backup_root);\n    }\n    if (!request.policy_name.empty()) {\n        backup_root = dsn::utils::filesystem::path_combine(backup_root, request.policy_name);\n    }\n    std::string app_metadata = cold_backup::get_app_metadata_file(\n        backup_root, request.app_name, request.app_id, request.time_stamp);\n\n    error_code err = ERR_OK;\n    block_file_ptr file_handle = nullptr;\n    ddebug_f(\"start to create metadata file {}\", app_metadata);\n    blk_fs\n        ->create_file(create_file_request{app_metadata, true},\n                      TASK_CODE_EXEC_INLINED,\n                      [&err, &file_handle](const create_file_response &resp) {\n                          err = resp.err;\n                          file_handle = resp.file_handle;\n                      })\n        ->wait();\n\n    if (err != ERR_OK) {\n        derror_f(\"create metadata file {} failed.\", app_metadata);\n        callback_tsk->enqueue_with(err, dsn::blob());\n        return;\n    }\n    dassert(file_handle != nullptr, \"create file from backup media ecounter error\");\n    file_handle->read(\n        read_request{0, -1}, TASK_CODE_EXEC_INLINED, [callback_tsk](const read_response &resp) {\n            callback_tsk->enqueue_with(resp.err, resp.buffer);\n        });\n}\n\nstd::pair<dsn::error_code, std::shared_ptr<app_state>> server_state::restore_app_info(\n    dsn::message_ex *msg, const configuration_restore_request &req, const dsn::blob &app_info)\n{\n    std::pair<dsn::error_code, std::shared_ptr<app_state>> res = std::make_pair(ERR_OK, nullptr);\n\n    dsn::app_info info;\n    if (!::dsn::json::json_forwarder<dsn::app_info>::decode(app_info, info)) {\n        std::string b_str(app_info.data(), app_info.length());\n        derror_f(\"decode app_info '{}' failed\", b_str);\n        // NOTICE : maybe find a better error_code to replace err_corruption\n        res.first = ERR_CORRUPTION;\n        return res;\n    }\n    int32_t old_app_id = info.app_id;\n    std::string old_app_name = info.app_name;\n    dassert(old_app_id == req.app_id, \"invalid app_id, %d VS %d\", old_app_id, req.app_id);\n    dassert(old_app_name == req.app_name,\n            \"invalid app_name, %s VS %s\",\n            old_app_name.c_str(),\n            req.app_name.c_str());\n    std::shared_ptr<app_state> app = nullptr;\n\n    if (!req.new_app_name.empty()) {\n        info.app_name = req.new_app_name;\n    }\n\n    {\n        // check whether appid and app_name/new_app_name is valid\n        zauto_write_lock l(_lock);\n        app = get_app(info.app_name);\n        if (app != nullptr) {\n            res.first = ERR_INVALID_PARAMETERS;\n            return res;\n        } else {\n            info.app_id = next_app_id();\n            app = app_state::create(info);\n            app->status = app_status::AS_CREATING;\n            app->helpers->pending_response = msg;\n            app->helpers->partitions_in_progress.store(info.partition_count);\n\n            _all_apps.emplace(app->app_id, app);\n            _exist_apps.emplace(info.app_name, app);\n        }\n    }\n    // TODO: using one single env to replace\n    app->envs[backup_restore_constant::BLOCK_SERVICE_PROVIDER] = req.backup_provider_name;\n    app->envs[backup_restore_constant::CLUSTER_NAME] = req.cluster_name;\n    app->envs[backup_restore_constant::POLICY_NAME] = req.policy_name;\n    app->envs[backup_restore_constant::APP_NAME] = old_app_name;\n    app->envs[backup_restore_constant::APP_ID] = std::to_string(old_app_id);\n    app->envs[backup_restore_constant::BACKUP_ID] = std::to_string(req.time_stamp);\n    if (req.skip_bad_partition) {\n        app->envs[backup_restore_constant::SKIP_BAD_PARTITION] = std::string(\"true\");\n    }\n    if (req.__isset.restore_path) {\n        app->envs[backup_restore_constant::RESTORE_PATH] = req.restore_path;\n    }\n    res.second.swap(app);\n    return res;\n}\n\nvoid server_state::restore_app(dsn::message_ex *msg)\n{\n    configuration_restore_request request;\n    dsn::unmarshall(msg, request);\n    sync_app_from_backup_media(\n        request, [this, msg, request](dsn::error_code err, const dsn::blob &app_info_data) {\n            dsn::error_code ec = ERR_OK;\n            // if err != ERR_OK, then sync_app_from_backup_media ecounter some error\n            if (err != ERR_OK) {\n                derror(\"sync app_info_data from backup media failed with err(%s)\", err.to_string());\n                ec = err;\n            } else {\n                auto pair = restore_app_info(msg, request, app_info_data);\n                if (pair.first != ERR_OK) {\n                    ec = pair.first;\n                } else {\n                    dassert(pair.second != nullptr, \"app info shouldn't be empty\");\n                    // the same with create_app\n                    do_app_create(pair.second);\n                    return;\n                }\n            }\n            if (ec != ERR_OK) {\n                configuration_create_app_response response;\n                response.err = ec;\n                response.appid = -1;\n                _meta_svc->reply_data(msg, response);\n                msg->release_ref();\n            }\n        });\n}\n\nvoid server_state::on_recv_restore_report(configuration_report_restore_status_rpc rpc)\n{\n    zauto_write_lock l(_lock);\n\n    const configuration_report_restore_status_request &request = rpc.request();\n    configuration_report_restore_status_response &response = rpc.response();\n    response.err = ERR_OK;\n\n    std::shared_ptr<app_state> app = get_app(request.pid.get_app_id());\n    if (app == nullptr) {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    } else {\n        restore_state &r_state = app->helpers->restore_states[request.pid.get_partition_index()];\n        if (r_state.restore_status != request.restore_status) {\n            r_state.restore_status = request.restore_status;\n        }\n        // TODO: for simply we don't allow progress to rollback;\n        // when restore-app, if meta crash, meta may assign primary to different server, so\n        // progress-rollback will happen, wait to process this situation\n        if (r_state.progress < request.progress) {\n            r_state.progress = request.progress;\n        }\n        if (request.__isset.reason) {\n            r_state.reason = request.reason;\n        }\n        ddebug(\"%d.%d restore report: restore_status(%s), progress(%d)\",\n               request.pid.get_app_id(),\n               request.pid.get_partition_index(),\n               request.restore_status.to_string(),\n               request.progress);\n    }\n}\n\nvoid server_state::on_query_restore_status(configuration_query_restore_rpc rpc)\n{\n    zauto_read_lock l(_lock);\n\n    const configuration_query_restore_request &request = rpc.request();\n    configuration_query_restore_response &response = rpc.response();\n    response.err = ERR_OK;\n\n    std::shared_ptr<app_state> app = get_app(request.restore_app_id);\n    if (app == nullptr) {\n        response.err = ERR_APP_NOT_EXIST;\n    } else {\n        if (app->status == app_status::AS_DROPPED) {\n            response.err = ERR_APP_DROPPED;\n        } else {\n            response.restore_progress.resize(app->partition_count,\n                                             cold_backup_constant::PROGRESS_FINISHED);\n            response.restore_status.resize(app->partition_count, ERR_OK);\n            for (int32_t i = 0; i < app->partition_count; i++) {\n                const auto &r_state = app->helpers->restore_states[i];\n                const auto &p = app->partitions[i];\n                if (!p.primary.is_invalid() || !p.secondaries.empty()) {\n                    // already have primary, restore succeed\n                    continue;\n                } else {\n                    if (r_state.progress < response.restore_progress[i]) {\n                        response.restore_progress[i] = r_state.progress;\n                    }\n                }\n                response.restore_status[i] = r_state.restore_status;\n            }\n        }\n    }\n}\n}\n}\n"
  },
  {
    "path": "src/meta/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.meta.test)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nfile(GLOB MY_PROJ_SRC\n        ${PROJECT_SOURCE_DIR}/src/meta/*.cpp\n        ${PROJECT_SOURCE_DIR}/src/meta/duplication/*.cpp\n        )\nset(MY_PROJ_SRC ${MY_PROJ_SRC} misc/misc.cpp)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n        dsn.replication.zookeeper_provider\n        dsn_replication_common\n        dsn.block_service\n        dsn.block_service.local\n        dsn.block_service.fds\n        dsn.block_service.hdfs\n        dsn.failure_detector\n        dsn_dist_cmd\n        dsn_http\n        dsn_runtime\n        dsn_aio\n        zookeeper\n        hashtable\n        galaxy-fds-sdk-cpp\n        PocoNet\n        PocoFoundation\n        PocoNetSSL\n        PocoJSON\n        crypto\n        gtest\n        ssl\n        hdfs)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES clear.sh run.sh config-test.ini suite1 suite2)\n\nadd_definitions(-DDSN_MOCK_TEST)\ndsn_add_test()\n\nadd_subdirectory(balancer_simulator)\nadd_subdirectory(meta_state)\n"
  },
  {
    "path": "src/meta/test/backup_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utils/time_utils.h>\n#include <gtest/gtest.h>\n\n#include \"meta/meta_backup_service.h\"\n#include \"meta/meta_service.h\"\n#include \"meta/test/misc/misc.h\"\n#include \"meta_service_test_app.h\"\n#include \"meta_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nstruct method_record\n{\n    dsn::utils::notify_event event;\n    int count;\n    int max_call_count;\n    // whether the event will be triggered when count==max_call_count\n    bool trigger_beyond;\n\n    method_record() : event(), count(0), max_call_count(1000000), trigger_beyond(true) {}\n};\n\nclass mock_base\n{\npublic:\n    void reset_records() { _records.clear(); }\n\nprotected:\n    std::map<std::string, method_record> _records;\n};\n\n#define MOCK_ADD_RECORD(records, method_name) records[#method_name] = method_record()\n#define MOCK_HELPER_FUNCS(method_name)                                                             \\\n    int &counter_##method_name() { return _records[#method_name].count; }                          \\\n    dsn::utils::notify_event &notifier_##method_name() { return _records[#method_name].event; }    \\\n    int maxcall_##method_name() { return _records[#method_name].max_call_count; }                  \\\n    void set_maxcall_##method_name(int callcount)                                                  \\\n    {                                                                                              \\\n        _records[#method_name].max_call_count = callcount;                                         \\\n    }                                                                                              \\\n    bool &trigger_beyond_##method_name() { return _records[#method_name].trigger_beyond; }\n\n#define DEFINE_MOCK0(base_class, method_name)                                                      \\\n    MOCK_HELPER_FUNCS(method_name)                                                                 \\\n    void method_name()                                                                             \\\n    {                                                                                              \\\n        ddebug(\"%s is called\", #method_name);                                                      \\\n        int &c = counter_##method_name();                                                          \\\n        ++c;                                                                                       \\\n        int max_call = maxcall_##method_name();                                                    \\\n        if (c <= max_call) {                                                                       \\\n            base_class::method_name();                                                             \\\n        }                                                                                          \\\n        if (c > max_call || (c == max_call && !trigger_beyond_##method_name())) {                  \\\n            notifier_##method_name().notify();                                                     \\\n        }                                                                                          \\\n    }\n\n#define DEFINE_MOCK1(base_class, method_name, type1)                                               \\\n    MOCK_HELPER_FUNCS(method_name)                                                                 \\\n    void method_name(type1 arg1)                                                                   \\\n    {                                                                                              \\\n        ddebug(\"%s is called\", #method_name);                                                      \\\n        int &c = counter_##method_name();                                                          \\\n        ++c;                                                                                       \\\n        int max_call = maxcall_##method_name();                                                    \\\n        if (c <= max_call) {                                                                       \\\n            base_class::method_name(arg1);                                                         \\\n        }                                                                                          \\\n        if (c > max_call || (c == max_call && !trigger_beyond_##method_name())) {                  \\\n            notifier_##method_name().notify();                                                     \\\n        }                                                                                          \\\n    }\n\n#define DEFINE_MOCK2(base_class, method_name, type1, type2)                                        \\\n    MOCK_HELPER_FUNCS(method_name)                                                                 \\\n    void method_name(type1 arg1, type2 arg2)                                                       \\\n    {                                                                                              \\\n        ddebug(\"%s is called\", #method_name);                                                      \\\n        int &c = counter_##method_name();                                                          \\\n        ++c;                                                                                       \\\n        int max_call = maxcall_##method_name();                                                    \\\n        if (c <= max_call) {                                                                       \\\n            base_class::method_name(arg1, arg2);                                                   \\\n        }                                                                                          \\\n        if (c > max_call || (c == max_call && !trigger_beyond_##method_name())) {                  \\\n            notifier_##method_name().notify();                                                     \\\n        }                                                                                          \\\n    }\n\n#define DEFINE_MOCK3(base_class, method_name, type1, type2, type3)                                 \\\n    MOCK_HELPER_FUNCS(method_name)                                                                 \\\n    void method_name(type1 arg1, type2 arg2, type3, arg3)                                          \\\n    {                                                                                              \\\n        ddebug(\"%s is called\", #method_name);                                                      \\\n        int &c = counter_##method_name();                                                          \\\n        ++c;                                                                                       \\\n        int max_call = maxcall_##method_name();                                                    \\\n        if (c <= max_call) {                                                                       \\\n            base_class::method_name(arg1, arg2, arg3);                                             \\\n        }                                                                                          \\\n        if (c > max_call || (c == max_call && !trigger_beyond_##method_name())) {                  \\\n            notifier_##method_name().notify();                                                     \\\n        }                                                                                          \\\n    }\n\nclass mock_policy : public policy_context, public mock_base\n{\npublic:\n    mock_policy(backup_service *bs) : policy_context(bs) {}\n    DEFINE_MOCK0(policy_context, issue_new_backup_unlocked)\n    DEFINE_MOCK0(policy_context, continue_current_backup_unlocked)\n    DEFINE_MOCK1(policy_context, start_backup_app_meta_unlocked, int32_t)\n    DEFINE_MOCK1(policy_context, finish_backup_app_unlocked, int32_t)\n    DEFINE_MOCK2(policy_context, write_backup_app_finish_flag_unlocked, int32_t, dsn::task_ptr)\n\n    MOCK_HELPER_FUNCS(start)\n    void start()\n    {\n        ++counter_start();\n        notifier_start().notify();\n    }\n};\n\nclass progress_liar : public meta_service\n{\npublic:\n    // req is held by callback, we don't need to handle the life-time of it\n    virtual void send_request(dsn::message_ex *req,\n                              const rpc_address &target,\n                              const rpc_response_task_ptr &callback)\n    {\n        // need to handle life-time manually\n        dsn::message_ex *recved_req = create_corresponding_receive(req);\n\n        backup_request b_req;\n        dsn::unmarshall(recved_req, b_req);\n\n        backup_response b_resp;\n        b_resp.backup_id = b_req.backup_id;\n        b_resp.err = dsn::ERR_OK;\n        b_resp.pid = b_req.pid;\n        b_resp.policy_name = b_req.policy.policy_name;\n        b_resp.progress = check_progress(b_req.pid);\n\n        // need to handle life-time manually\n        dsn::message_ex *response_for_send = recved_req->create_response();\n        dsn::marshall(response_for_send, b_resp);\n\n        // life time is handled by callback\n        dsn::message_ex *response_for_receive = create_corresponding_receive(response_for_send);\n        callback->enqueue(dsn::ERR_OK, (dsn::message_ex *)response_for_receive);\n\n        destroy_message(recved_req);\n        destroy_message(response_for_send);\n    }\n\n    int32_t check_progress(const gpid &pid)\n    {\n        if (progress.find(pid) == progress.end()) {\n            progress[pid] = 500;\n        } else if (progress[pid] == 500) {\n            progress[pid] = 250;\n        } else if (progress[pid] == 250) {\n            progress[pid] = 1000;\n        }\n\n        return progress[pid];\n    }\n\nprivate:\n    std::map<gpid, int32_t> progress;\n};\n\nstatic const std::string test_policy_name = \"test_policy_name\";\n\nclass policy_context_test : public meta_test_base\n{\nprotected:\n    policy_context_test() : _service(new progress_liar()), _mp(nullptr) {}\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n\n        dsn::error_code ec = _service->remote_storage_initialize();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n        _service->_started = true;\n        _service->_backup_handler =\n            std::make_shared<backup_service>(_service.get(), policy_root, \".\", nullptr);\n        _service->_backup_handler->backup_option().app_dropped_retry_delay_ms = 500_ms;\n        _service->_backup_handler->backup_option().request_backup_period_ms = 20_ms;\n        _service->_backup_handler->backup_option().issue_backup_interval_ms = 1000_ms;\n        _service->_storage\n            ->create_node(\n                policy_root, dsn::TASK_CODE_EXEC_INLINED, [&ec](dsn::error_code err) { ec = err; })\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OK, ec);\n\n        _policy.policy_name = test_policy_name;\n        _policy.is_disable = false;\n        _policy.backup_interval_seconds = 5;\n        _policy.backup_provider_type = \"local_service\";\n        _policy.start_time = backup_start_time(24, 0);\n        _policy.app_ids = {1, 2, 3, 4, 6};\n        _policy.app_names[1] = \"app1\";\n        _policy.app_names[2] = \"app2\";\n        _policy.app_names[3] = \"app3\";\n        _policy.app_names[4] = \"app4\";\n        _policy.app_names[6] = \"app6\";\n        _mp._backup_service = _service->_backup_handler.get();\n        _mp.set_policy(policy(_policy));\n\n        _service->_storage\n            ->create_node(\n                policy_dir, dsn::TASK_CODE_EXEC_INLINED, [&ec](dsn::error_code err) { ec = err; })\n            ->wait();\n        ASSERT_EQ(dsn::ERR_OK, ec);\n    }\n\n    const std::string policy_root = \"/test\";\n    const std::string policy_dir = \"/test/\" + test_policy_name;\n\n    std::shared_ptr<meta_service> _service;\n    mock_policy _mp;\n    policy _policy;\n};\n\nTEST_F(policy_context_test, test_app_dropped_during_backup)\n{\n    int64_t time_before_backup = static_cast<int64_t>(dsn_now_ms());\n    server_state *state = _service->get_server_state();\n\n    {\n        // Prepare: backup_history is empty, all apps are deleted.\n        // Result: we can't get continue-curr called, issue will be recalled again\n        std::cout << \"issue a backup, but no app is available\" << std::endl;\n\n        {\n            zauto_lock l(_mp._lock);\n            _mp.set_maxcall_issue_new_backup_unlocked(2);\n            _mp.issue_new_backup_unlocked();\n        }\n\n        ASSERT_TRUE(_mp.notifier_issue_new_backup_unlocked().wait_for(5000));\n\n        {\n            zauto_lock l(_mp._lock);\n            ASSERT_EQ(0, _mp.counter_continue_current_backup_unlocked());\n            ASSERT_LE(time_before_backup, _mp._cur_backup.backup_id);\n            ASSERT_EQ(_policy.app_ids, _mp._cur_backup.app_ids);\n            ASSERT_NE(0, _mp._cur_backup.start_time_ms);\n            ASSERT_TRUE(_mp._progress.unfinished_partitions_per_app.empty());\n            ASSERT_EQ(_policy.app_ids.size(), _mp._progress.unfinished_apps);\n            ASSERT_LE(test_policy_name + std::string(\"@\") + std::to_string(time_before_backup),\n                      _mp._backup_sig);\n        }\n    }\n\n    {\n        // Prepare: backup_history is empty\n        //          not all apps are deleted.\n        // Result: we can get continue-curr called\n        std::cout << \"issue a new backup without backup histories\" << std::endl;\n        dsn::app_info info;\n        info.is_stateful = true;\n        info.app_id = 3;\n        info.app_type = \"simple_kv\";\n        info.max_replica_count = 3;\n        info.partition_count = 32;\n        info.status = dsn::app_status::AS_AVAILABLE;\n        state->_all_apps.emplace(info.app_id, app_state::create(info));\n\n        {\n            zauto_lock l(_mp._lock);\n            _mp.reset_records();\n            _mp.set_maxcall_continue_current_backup_unlocked(0);\n            _mp.issue_new_backup_unlocked();\n        }\n\n        ASSERT_TRUE(_mp.notifier_continue_current_backup_unlocked().wait_for(5000));\n\n        {\n            zauto_lock l(_mp._lock);\n            ASSERT_EQ(_policy.app_ids.size(), _mp._progress.unfinished_apps);\n            ASSERT_EQ(1, _mp._progress.unfinished_partitions_per_app.size());\n            ASSERT_EQ(info.app_id, _mp._progress.unfinished_partitions_per_app.begin()->first);\n            ASSERT_EQ(info.partition_count,\n                      _mp._progress.unfinished_partitions_per_app.begin()->second);\n            ASSERT_EQ(info.partition_count, _mp._progress.partition_progress.size());\n        }\n    }\n\n    {\n        // test cases\n        // Prepare: backup_history isn't empty,\n        //          all apps are unavailable,\n        //          we will reach next backup time 500ms later\n        // Result: issue called 3 times\n        std::cout << \"issue a new backup later\" << std::endl;\n\n        backup_info info;\n        info.app_ids = {1, 2, 3};\n\n        info.start_time_ms = dsn_now_ms() - (_policy.backup_interval_seconds + 20) * 1000 - 500;\n        info.end_time_ms = info.start_time_ms + 10;\n        info.backup_id = info.start_time_ms;\n        _mp.add_backup_history(info);\n\n        info.start_time_ms += 10000;\n        info.end_time_ms += 10000;\n        info.backup_id = info.start_time_ms;\n        _mp.add_backup_history(info);\n\n        // the start time for recent backup is 500ms ago\n        info.start_time_ms += 10000;\n        info.end_time_ms += 10000;\n        info.backup_id = info.start_time_ms;\n        _mp.add_backup_history(info);\n\n        {\n            zauto_lock l(_mp._lock);\n            _mp.reset_records();\n            // issue by test -> issue by period delay -> issue by dropped retry ->\n            // issue by dropped retry\n            _mp.set_maxcall_issue_new_backup_unlocked(4);\n            state->_all_apps[3]->status = dsn::app_status::AS_DROPPED;\n\n            _mp.issue_new_backup_unlocked();\n        }\n        // we mark all apps as dropped, so reissue will be triggered\n        ASSERT_TRUE(_mp.notifier_issue_new_backup_unlocked().wait_for(20000));\n\n        {\n            int64_t start_time_ms_of_sixth_backup =\n                info.start_time_ms + _policy.backup_interval_seconds * 1000;\n            zauto_lock l(_mp._lock);\n            ASSERT_LE(start_time_ms_of_sixth_backup, _mp._cur_backup.backup_id);\n            ASSERT_EQ(_policy.app_ids, _mp._cur_backup.app_ids);\n\n            // every time intialize backup, the progress will be reset\n            ASSERT_TRUE(_mp._progress.unfinished_partitions_per_app.empty());\n            ASSERT_TRUE(_mp._progress.partition_progress.empty());\n            ASSERT_EQ(_policy.app_ids.size(), _mp._progress.unfinished_apps);\n            ASSERT_LE(test_policy_name + \"@\" + std::to_string(start_time_ms_of_sixth_backup),\n                      _mp._backup_sig);\n        }\n    }\n\n    {\n        // test case: continue current backup unlocked\n        // Prepare: app 3 is available\n        //          clear the backup list\n        //          call continue_current_backup_unlocked.\n        // Result: app {1, 2, 4, 6} will treat as finished, both finish_backup_app_unlocked\n        //         and write_backup_app_finish_flag_unlocked will be called 4 times.\n        //         start_backup_app_meta is called for app 3, only called once,\n        //         as app 3 won't be finished, so the backup can't finish\n        std::cout << \"continue backup, only some apps are available \" << std::endl;\n        {\n            zauto_lock l(_mp._lock);\n            _mp._backup_history.clear();\n            _mp.reset_records();\n            _mp.set_maxcall_start_backup_app_meta_unlocked(0);\n\n            _mp.set_maxcall_finish_backup_app_unlocked(4);\n            _mp.trigger_beyond_finish_backup_app_unlocked() = false;\n            _mp.set_maxcall_write_backup_app_finish_flag_unlocked(4);\n            _mp.trigger_beyond_write_backup_app_finish_flag_unlocked() = false;\n\n            state->_all_apps[3]->status = dsn::app_status::AS_AVAILABLE;\n            _mp.issue_new_backup_unlocked();\n        }\n\n        ASSERT_TRUE(_mp.notifier_start_backup_app_meta_unlocked().wait_for(10000));\n        ASSERT_TRUE(_mp.notifier_finish_backup_app_unlocked().wait_for(10000));\n        ASSERT_TRUE(_mp.notifier_write_backup_app_finish_flag_unlocked().wait_for(10000));\n\n        {\n            zauto_lock l(_mp._lock);\n            ASSERT_EQ(1, _mp.counter_start_backup_app_meta_unlocked());\n            ASSERT_EQ(4, _mp.counter_finish_backup_app_unlocked());\n        }\n    }\n\n    {\n        // test case: app is dropped when start backup meta\n        // Prepare: prepare the current backup, then mark the app as dropped\n        // Result: all apps will be marked as finished, new backup will be issued\n        std::cout << \"app is dropped when start to backup meta\" << std::endl;\n        app_state *app = state->_all_apps[3].get();\n\n        {\n            zauto_lock l(_mp._lock);\n            _mp._backup_history.clear();\n            _mp.reset_records();\n\n            _mp.prepare_current_backup_on_new_unlocked();\n            dsn::task_ptr tsk = tasking::create_task(TASK_CODE_EXEC_INLINED, nullptr, []() {});\n            _mp.sync_backup_to_remote_storage_unlocked(_mp._cur_backup, tsk, true);\n            tsk->wait();\n            _mp.set_maxcall_issue_new_backup_unlocked(1);\n\n            ASSERT_EQ(_mp._progress.unfinished_apps, _policy.app_ids.size());\n            app->status = dsn::app_status::AS_DROPPED;\n\n            _mp.continue_current_backup_unlocked();\n        }\n\n        // new backup will be issued 5s later.\n        ASSERT_TRUE(_mp.notifier_issue_new_backup_unlocked().wait_for(20000));\n\n        {\n            zauto_lock l(_mp._lock);\n            ASSERT_EQ(0, _mp._cur_backup.end_time_ms);\n            ASSERT_EQ(0, _mp._progress.unfinished_apps);\n            ASSERT_EQ(app->partition_count, _mp._progress.partition_progress.size());\n\n            const backup_info &history = _mp._backup_history.begin()->second;\n            ASSERT_NE(0, history.start_time_ms);\n            ASSERT_GE(history.end_time_ms, history.start_time_ms);\n\n            for (const auto &kv : _mp._progress.partition_progress) {\n                ASSERT_EQ(kv.first.get_app_id(), app->app_id);\n                ASSERT_EQ(kv.second, 1000);\n            }\n            for (const auto &kv : _mp._progress.unfinished_partitions_per_app) {\n                ASSERT_EQ(0, kv.second);\n            }\n        }\n    }\n\n    {\n        // test_case: a full backup procedure\n        // Prepare: issue a new backup\n        // Result: a new backup will be issued, and we have a entry on remote storage\n        std::cout << \"a successful entire backup\" << std::endl;\n        int64_t cur_start_time_ms = static_cast<int64_t>(dsn_now_ms());\n        {\n            zauto_lock l(_mp._lock);\n            std::vector<dsn::rpc_address> node_list;\n            generate_node_list(node_list, 3, 3);\n\n            app_state *app = state->_all_apps[3].get();\n            app->status = dsn::app_status::AS_AVAILABLE;\n            for (partition_configuration &pc : app->partitions) {\n                pc.primary = node_list[0];\n                pc.secondaries = {node_list[1], node_list[2]};\n            }\n\n            _mp._backup_history.clear();\n            _mp.reset_records();\n\n            // issue_in_test -> issued by finish all apps -> a delay for backup interval\n            _mp.set_maxcall_issue_new_backup_unlocked(2);\n            _mp.issue_new_backup_unlocked();\n        }\n\n        ASSERT_TRUE(_mp.notifier_issue_new_backup_unlocked().wait_for(10000));\n\n        {\n            zauto_lock l(_mp._lock);\n            // as new backup is captured and abandoned, so we can check the current backup\n            ASSERT_EQ(1, _mp._backup_history.size());\n            // the first backup's id is 1\n            ASSERT_LE(cur_start_time_ms, _mp._backup_history.begin()->first);\n            const backup_info &history = _mp._backup_history.begin()->second;\n            ASSERT_NE(0, history.start_time_ms);\n            ASSERT_GE(history.end_time_ms, history.start_time_ms);\n\n            // check the progress\n            for (const auto &kv : _mp._progress.partition_progress) {\n                ASSERT_EQ(kv.second, 1000);\n            }\n            ASSERT_EQ(0, _mp._progress.unfinished_apps);\n        }\n    }\n\n    {\n        // test case: add backup_history\n        std::cout << \"test add backup history\" << std::endl;\n\n        _mp._backup_history.clear();\n        _mp._cur_backup.backup_id = 0;\n        _mp._cur_backup.end_time_ms = 0;\n\n        backup_info bi;\n        bi.start_time_ms = 100;\n        bi.end_time_ms = 110;\n        bi.app_ids = {1, 2, 3};\n        bi.backup_id = bi.start_time_ms;\n        _mp.add_backup_history(bi);\n\n        bi.start_time_ms += 1000;\n        bi.end_time_ms += 1000;\n        bi.app_ids = {1, 2, 5};\n        bi.backup_id = bi.start_time_ms;\n        _mp.add_backup_history(bi);\n\n        bi.start_time_ms += 1000;\n        bi.end_time_ms = 0;\n        bi.app_ids = {1, 2, 7};\n        bi.backup_id = bi.start_time_ms;\n\n        _mp.add_backup_history(bi);\n\n        ASSERT_EQ(bi.backup_id, _mp._cur_backup.backup_id);\n        ASSERT_EQ(bi.app_ids, _mp._cur_backup.app_ids);\n        ASSERT_EQ(0, _mp._cur_backup.end_time_ms);\n\n        ASSERT_EQ(bi.app_ids.size(), _mp._progress.unfinished_apps);\n        ASSERT_EQ(2, _mp._backup_history.size());\n\n        std::string cur_backup_sig =\n            test_policy_name + std::string(\"@\") + std::to_string(bi.backup_id);\n        ASSERT_EQ(cur_backup_sig, _mp._backup_sig);\n    }\n}\n\nTEST_F(policy_context_test, test_disable_backup_policy)\n{\n    _policy = _mp.get_policy();\n    _policy.is_disable = true;\n    _mp.set_policy(_policy);\n\n    _mp._backup_history.clear();\n    _mp._cur_backup.backup_id = 0;\n    _mp._cur_backup.end_time_ms = 0;\n\n    backup_info bi;\n    bi.start_time_ms = dsn_now_ms();\n    bi.end_time_ms = 0;\n    bi.app_ids = {1};\n    bi.backup_id = bi.start_time_ms;\n    _mp.add_backup_history(bi);\n\n    // 'start_backup_app_meta_unlocked()' should not be called because policy is disabled\n    _mp.continue_current_backup_unlocked();\n    ASSERT_FALSE(_mp.notifier_start_backup_app_meta_unlocked().wait_for(5000));\n}\n\nTEST_F(policy_context_test, test_backup_failed)\n{\n    fail::setup();\n    fail::cfg(\"mock_local_service_write_failed\", \"100%1*return(ERR_FS_INTERNAL)\");\n\n    // app 1 is available.\n    dsn::app_info info;\n    info.is_stateful = true;\n    info.app_id = 1;\n    info.app_type = \"simple_kv\";\n    info.max_replica_count = 3;\n    info.partition_count = 4;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    _service->get_server_state()->_all_apps.emplace(info.app_id, app_state::create(info));\n\n    {\n        zauto_lock l(_mp._lock);\n        _mp._backup_history.clear();\n        _mp.reset_records();\n\n        // start backup in this policy\n        _mp.issue_new_backup_unlocked();\n    }\n    sleep(1);\n    {\n        zauto_lock l(_mp._lock);\n        ASSERT_TRUE(_mp._is_backup_failed);\n    }\n    ASSERT_FALSE(_mp.is_under_backuping());\n\n    fail::teardown();\n}\n\n// test should_start_backup_unlock()\nTEST_F(policy_context_test, test_should_start_backup)\n{\n    uint64_t now = dsn_now_ms();\n    int32_t hour = 0, min = 0, sec = 0;\n    ::dsn::utils::time_ms_to_date_time(now, hour, min, sec);\n    while (min == 59) {\n        std::this_thread::sleep_for(std::chrono::minutes(1));\n        now = dsn_now_ms();\n        ::dsn::utils::time_ms_to_date_time(now, hour, min, sec);\n    }\n\n    int64_t oneday_sec = 1 * 24 * 60 * 60;\n    _mp._policy.start_time.hour = hour;\n    _mp._policy.start_time.minute = 0;\n    _mp._policy.backup_interval_seconds = oneday_sec; // oneday\n    _mp._backup_history.clear();\n\n    backup_info info;\n\n    {\n        std::cout << \"first backup & no limit to start_time\" << std::endl;\n        _mp._policy.start_time.hour = 24;\n        ASSERT_TRUE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        std::cout << \"first backup & cur_time.hour == start_time.hour\" << std::endl;\n        _mp._policy.start_time.hour = hour;\n        ASSERT_TRUE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        std::cout << \"first backup & cur_time.hour != start_time.hour\" << std::endl;\n        _mp._policy.start_time.hour = hour + 100; // invalid time\n        ASSERT_FALSE(_mp.should_start_backup_unlocked());\n        _mp._policy.start_time.hour = (hour + 1) % 24; // valid, but not reach\n        ASSERT_FALSE(_mp.should_start_backup_unlocked());\n        _mp._policy.start_time.hour = hour - 1; // time passed(also, include -1)\n        ASSERT_FALSE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        std::cout << \"not first backup & recent backup delay 20min to start\" << std::endl;\n        info.start_time_ms = now - (oneday_sec * 1000) + 20 * 60 * 1000;\n        info.end_time_ms = info.start_time_ms + 10;\n        _mp.add_backup_history(info);\n        // if we set start_time to 24:00, then will not start backup\n        _mp._policy.start_time.hour = 24;\n        ASSERT_FALSE(_mp.should_start_backup_unlocked());\n        // if we set start_time to hour:00, then will start backup, even if the interval <\n        // policy.backup_interval\n        _mp._policy.start_time.hour = hour;\n        ASSERT_TRUE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        std::cout << \"not first backup & recent backup start time is equal with start_time\"\n                  << std::endl;\n        _mp._policy.start_time.hour = hour;\n        _mp._backup_history.clear();\n        info.start_time_ms = now - (oneday_sec * 1000) - (min * 60 * 1000);\n        info.start_time_ms = (info.start_time_ms / 1000) * 1000;\n        info.end_time_ms = info.start_time_ms + 10;\n        _mp.add_backup_history(info);\n        ASSERT_TRUE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        // delay the start_time\n        std::cout << \"not first backup & delay the start time of policy\" << std::endl;\n        _mp._policy.start_time.hour = hour + 1;\n        _mp._backup_history.clear();\n        // make sure the start time of recent backup is litte than policy's start_time, so we\n        // minus more 3min\n        info.start_time_ms = now - (oneday_sec * 1000) - 3 * 60 * 1000;\n        info.end_time_ms = info.start_time_ms + 10;\n        _mp.add_backup_history(info);\n        if (_mp._policy.start_time.hour == 24) {\n            // if hour = 23, then policy.start_time.hour = 24, we should start next backup,\n            // because now - info.start_time_ms > policy.backup_interval\n            ASSERT_TRUE(_mp.should_start_backup_unlocked());\n        } else {\n            // should not start, even if now - info.start_time_ms > policy.backup_interval, but\n            // not reach the time-point that policy.start_time limit\n            ASSERT_FALSE(_mp.should_start_backup_unlocked());\n        }\n    }\n\n    {\n        std::cout << \"not first backup & no limit to start time & should start backup\" << std::endl;\n        _mp._policy.start_time.hour = 24;\n        _mp._backup_history.clear();\n        info.start_time_ms = now - (oneday_sec * 1000) - 3 * 60 * 60;\n        info.end_time_ms = info.start_time_ms + 10;\n        _mp.add_backup_history(info);\n        ASSERT_TRUE(_mp.should_start_backup_unlocked());\n    }\n\n    {\n        std::cout << \"not first backup & no limit to start time & should not start backup\"\n                  << std::endl;\n        _mp._backup_history.clear();\n        info.start_time_ms = now - (oneday_sec * 1000) + 3 * 60 * 60;\n        info.end_time_ms = info.start_time_ms + 10;\n        _mp.add_backup_history(info);\n        ASSERT_FALSE(_mp.should_start_backup_unlocked());\n    }\n}\n\nclass meta_backup_service_test : public meta_test_base\n{\nprotected:\n    meta_backup_service_test() : _meta_svc(new fake_receiver_meta_service()), _backup_svc(nullptr)\n    {\n    }\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n\n        meta_options &opt = _meta_svc->_meta_opts;\n        opt.cluster_root = \"/meta_test\";\n        opt.meta_state_service_type = \"meta_state_service_simple\";\n        _meta_svc->remote_storage_initialize();\n        std::string backup_root = \"/backup_test\";\n        std::string policy_meta_root = opt.cluster_root + \"/backup_policies\";\n        _meta_svc->_backup_handler = std::make_shared<backup_service>(\n            _meta_svc.get(), policy_meta_root, backup_root, [](backup_service *bs) {\n                return std::make_shared<mock_policy>(bs);\n            });\n        _backup_svc = _meta_svc->_backup_handler.get();\n    }\n\n    std::shared_ptr<meta_service> _meta_svc;\n    backup_service *_backup_svc;\n};\n\nTEST_F(meta_backup_service_test, test_add_backup_policy)\n{\n    // create policy meta root.\n    bool flag = false;\n    dsn::task_ptr task_test =\n        tasking::create_task(LPC_DEFAULT_CALLBACK, nullptr, [&flag]() { flag = true; });\n    _backup_svc->start_create_policy_meta_root(task_test);\n    while (!flag) {\n        std::cout << \"wait create policy_meta_root succeed\" << std::endl;\n        sleep(1);\n    }\n    ASSERT_TRUE(flag);\n\n    configuration_add_backup_policy_request req;\n    req.backup_provider_type = std::string(\"local_service\");\n    req.policy_name = test_policy_name;\n    req.app_ids = {1, 2, 3};\n    req.backup_interval_seconds = 24 * 60 * 60;\n\n    // case1: backup policy doesn't contain any valid app_id\n    // result: backup policy will not be added, and return ERR_INVALID_PARAMETERS\n    {\n        configuration_add_backup_policy_response resp;\n        auto r = fake_rpc_call(RPC_CM_ADD_BACKUP_POLICY,\n                               LPC_DEFAULT_CALLBACK,\n                               _backup_svc,\n                               &backup_service::add_backup_policy,\n                               req);\n        fake_wait_rpc(r, resp);\n        ASSERT_EQ(ERR_INVALID_PARAMETERS, resp.err);\n        // hint message contains the first invalid app id\n        std::string hint_message = \"invalid app 1\";\n        ASSERT_EQ(hint_message, resp.hint_message);\n    }\n\n    // case2: backup policy interval time < checkpoint reserve time\n    // result: backup policy will not be added, and return ERR_INVALID_PARAMETERS\n    {\n        int64_t old_backup_interval_seconds = req.backup_interval_seconds;\n        req.backup_interval_seconds = 10;\n        configuration_add_backup_policy_response resp;\n        server_state *state = _meta_svc->get_server_state();\n        state->_all_apps.insert(std::make_pair(1, std::make_shared<app_state>(app_info())));\n        auto r = fake_rpc_call(RPC_CM_ADD_BACKUP_POLICY,\n                               LPC_DEFAULT_CALLBACK,\n                               _backup_svc,\n                               &backup_service::add_backup_policy,\n                               req);\n        fake_wait_rpc(r, resp);\n\n        std::string hint_message = fmt::format(\n            \"backup interval must be greater than cold_backup_checkpoint_reserve_minutes={}\",\n            _meta_svc->get_options().cold_backup_checkpoint_reserve_minutes);\n        ASSERT_EQ(ERR_INVALID_PARAMETERS, resp.err);\n        ASSERT_EQ(hint_message, resp.hint_message);\n        req.backup_interval_seconds = old_backup_interval_seconds;\n    }\n\n    // case3: backup policy contains valid and invalid app_id\n    // result: backup policy will not be added, and return ERR_INVALID_PARAMETERS\n    {\n        configuration_add_backup_policy_response resp;\n        server_state *state = _meta_svc->get_server_state();\n        state->_all_apps.insert(std::make_pair(1, std::make_shared<app_state>(app_info())));\n        auto r = fake_rpc_call(RPC_CM_ADD_BACKUP_POLICY,\n                               LPC_DEFAULT_CALLBACK,\n                               _backup_svc,\n                               &backup_service::add_backup_policy,\n                               req);\n        fake_wait_rpc(r, resp);\n        ASSERT_EQ(ERR_INVALID_PARAMETERS, resp.err);\n        // hint message contains the first invalid app id\n        std::string hint_message = \"invalid app 2\";\n        ASSERT_EQ(hint_message, resp.hint_message);\n    }\n\n    // case4: backup policy only contains valid app_id\n    // result: add_backup_policy succeed\n    {\n        configuration_add_backup_policy_response resp;\n        server_state *state = _meta_svc->get_server_state();\n        state->_all_apps.insert(std::make_pair(2, std::make_shared<app_state>(app_info())));\n        state->_all_apps.insert(std::make_pair(3, std::make_shared<app_state>(app_info())));\n        auto r = fake_rpc_call(RPC_CM_ADD_BACKUP_POLICY,\n                               LPC_DEFAULT_CALLBACK,\n                               _backup_svc,\n                               &backup_service::add_backup_policy,\n                               req);\n        fake_wait_rpc(r, resp);\n        ASSERT_EQ(ERR_OK, resp.err);\n    }\n\n    // test sync_policies_from_remote_storage\n    _backup_svc->_policy_states.clear();\n    ASSERT_TRUE(_backup_svc->_policy_states.empty());\n    error_code err = _backup_svc->sync_policies_from_remote_storage();\n    ASSERT_EQ(ERR_OK, err);\n    ASSERT_EQ(1, _backup_svc->_policy_states.size());\n    ASSERT_TRUE(_backup_svc->_policy_states.find(test_policy_name) !=\n                _backup_svc->_policy_states.end());\n    const policy &p = _backup_svc->_policy_states.at(test_policy_name)->get_policy();\n    ASSERT_EQ(3, p.app_ids.size());\n    ASSERT_EQ(\"local_service\", p.backup_provider_type);\n    ASSERT_EQ(24 * 60 * 60, p.backup_interval_seconds);\n    ASSERT_EQ(test_policy_name, p.policy_name);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/balancer_simulator/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME sim_lb)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC ../misc/misc.cpp)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS\n    dsn_meta_server\n    dsn_replication_common\n    dsn_runtime\n    hashtable\n    gtest)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_test()\n"
  },
  {
    "path": "src/meta/test/balancer_simulator/balancer_simulator.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <algorithm>\n#include <gtest/gtest.h>\n\n#include \"meta/meta_data.h\"\n#include \"meta/server_load_balancer.h\"\n#include \"meta/greedy_load_balancer.h\"\n#include \"meta/test/misc/misc.h\"\n\nusing namespace dsn::replication;\n\n#ifdef ASSERT_EQ\n#undef ASSERT_EQ\n#endif\n#define ASSERT_EQ(left, right) dassert((left) == (right), \"\")\n\n#ifdef ASSERT_TRUE\n#undef ASSERT_TRUE\n#endif\n#define ASSERT_TRUE(exp) dassert((exp), \"\")\n\n#ifdef ASSERT_FALSE\n#undef ASSERT_FALSE\n#endif\n#define ASSERT_FALSE(exp) dassert(!(exp), \"\")\n\nclass simple_priority_queue\n{\npublic:\n    simple_priority_queue(const std::vector<dsn::rpc_address> &nl,\n                          server_load_balancer::node_comparator &&compare)\n        : container(nl), cmp(std::move(compare))\n    {\n        std::make_heap(container.begin(), container.end(), cmp);\n    }\n    void push(const dsn::rpc_address &addr)\n    {\n        container.push_back(addr);\n        std::push_heap(container.begin(), container.end(), cmp);\n    }\n    dsn::rpc_address pop()\n    {\n        std::pop_heap(container.begin(), container.end(), cmp);\n        dsn::rpc_address result = container.back();\n        container.pop_back();\n        return result;\n    }\n    dsn::rpc_address top() const { return container.front(); }\n    bool empty() const { return container.empty(); }\nprivate:\n    std::vector<dsn::rpc_address> container;\n    server_load_balancer::node_comparator cmp;\n};\n\nvoid generate_balanced_apps(/*out*/ app_mapper &apps,\n                            node_mapper &nodes,\n                            const std::vector<dsn::rpc_address> &node_list)\n{\n    nodes.clear();\n    for (const auto &node : node_list)\n        nodes[node].set_alive(true);\n\n    int partitions_per_node = random32(20, 100);\n    dsn::app_info info;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    info.is_stateful = true;\n    info.app_id = 1;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.partition_count = partitions_per_node * node_list.size();\n    info.max_replica_count = 3;\n\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n\n    simple_priority_queue pq1(node_list, server_load_balancer::primary_comparator(nodes));\n    // generate balanced primary\n    for (dsn::partition_configuration &pc : the_app->partitions) {\n        dsn::rpc_address n = pq1.pop();\n        nodes[n].put_partition(pc.pid, true);\n        pc.primary = n;\n        pq1.push(n);\n    }\n\n    // generate balanced secondary\n    simple_priority_queue pq2(node_list, server_load_balancer::partition_comparator(nodes));\n    std::vector<dsn::rpc_address> temp;\n\n    for (dsn::partition_configuration &pc : the_app->partitions) {\n        temp.clear();\n        while (pc.secondaries.size() + 1 < pc.max_replica_count) {\n            dsn::rpc_address n = pq2.pop();\n            if (!is_member(pc, n)) {\n                pc.secondaries.push_back(n);\n                nodes[n].put_partition(pc.pid, false);\n            }\n            temp.push_back(n);\n        }\n        for (auto n : temp)\n            pq2.push(n);\n    }\n\n    // check if balanced\n    int pri_min, part_min;\n    pri_min = part_min = the_app->partition_count + 1;\n    int pri_max, part_max;\n    pri_max = part_max = -1;\n\n    for (auto &kv : nodes) {\n        if (kv.second.primary_count() > pri_max)\n            pri_max = kv.second.primary_count();\n        if (kv.second.primary_count() < pri_min)\n            pri_min = kv.second.primary_count();\n        if (kv.second.partition_count() > part_max)\n            part_max = kv.second.partition_count();\n        if (kv.second.partition_count() < part_min)\n            part_min = kv.second.partition_count();\n    }\n\n    apps.emplace(the_app->app_id, the_app);\n\n    ASSERT_TRUE(pri_max - pri_min <= 1);\n    ASSERT_TRUE(part_max - part_min <= 1);\n}\n\nvoid random_move_primary(app_mapper &apps, node_mapper &nodes, int primary_move_ratio)\n{\n    app_state &the_app = *(apps[0]);\n    int space_size = the_app.partition_count * 100;\n    for (dsn::partition_configuration &pc : the_app.partitions) {\n        int n = random32(1, space_size) / 100;\n        if (n < primary_move_ratio) {\n            int indice = random32(0, 1);\n            nodes[pc.primary].remove_partition(pc.pid, true);\n            std::swap(pc.primary, pc.secondaries[indice]);\n            nodes[pc.primary].put_partition(pc.pid, true);\n        }\n    }\n}\n\nvoid greedy_balancer_perfect_move_primary()\n{\n    app_mapper apps;\n    node_mapper nodes;\n    std::vector<dsn::rpc_address> node_list;\n\n    generate_node_list(node_list, 20, 100);\n    generate_balanced_apps(apps, nodes, node_list);\n\n    random_move_primary(apps, nodes, 70);\n    // test the greedy balancer's move primary\n    greedy_load_balancer glb(nullptr);\n    migration_list ml;\n\n    glb.check({&apps, &nodes}, ml);\n    dinfo(\"balance checker operation count = %d\", ml.size());\n    int i = 0;\n\n    while (glb.balance({&apps, &nodes}, ml)) {\n        for (const auto &kv : ml) {\n            const std::shared_ptr<configuration_balancer_request> &req = kv.second;\n            for (const configuration_proposal_action &act : req->action_list) {\n                ASSERT_TRUE(act.type != config_type::CT_ADD_SECONDARY_FOR_LB);\n            }\n        }\n        glb.check({&apps, &nodes}, ml);\n        dinfo(\"round %d: balance checker operation count = %d\", ++i, ml.size());\n    }\n}\n\nint main(int, char **)\n{\n    dsn_run_config(\"config.ini\", false);\n    greedy_balancer_perfect_move_primary();\n    return 0;\n}\n"
  },
  {
    "path": "src/meta/test/balancer_validator.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\n#include <fstream>\n\n#include \"meta/meta_data.h\"\n#include \"meta/server_load_balancer.h\"\n#include \"meta/greedy_load_balancer.h\"\n\n#include \"meta/test/misc/misc.h\"\n\n#include \"meta_service_test_app.h\"\n\nnamespace dsn {\nnamespace replication {\n\n#ifdef ASSERT_EQ\n#undef ASSERT_EQ\n#endif\n\n#define ASSERT_EQ(left, right) dassert((left) == (right), \"\")\n\n#ifdef ASSERT_TRUE\n#undef ASSERT_TRUE\n#endif\n\n#define ASSERT_TRUE(exp) dassert((exp), \"\")\n\n#ifdef ASSERT_FALSE\n#undef ASSERT_FALSE\n#endif\n#define ASSERT_FALSE(exp) dassert(!(exp), \"\")\n\nstatic void check_cure(app_mapper &apps, node_mapper &nodes, ::dsn::partition_configuration &pc)\n{\n    meta_service svc;\n    partition_guardian guardian(&svc);\n    pc_status ps = pc_status::invalid;\n    node_state *ns;\n\n    configuration_proposal_action act;\n    while (ps != pc_status::healthy) {\n        ps = guardian.cure({&apps, &nodes}, pc.pid, act);\n        if (act.type == config_type::CT_INVALID)\n            break;\n        switch (act.type) {\n        case config_type::CT_ASSIGN_PRIMARY:\n            ASSERT_TRUE(pc.primary.is_invalid() && pc.secondaries.size() == 0);\n            ASSERT_EQ(act.node, act.target);\n            ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n\n            ASSERT_EQ(nodes[act.node].served_as(pc.pid), partition_status::PS_INACTIVE);\n            nodes[act.node].put_partition(pc.pid, true);\n            pc.primary = act.node;\n            break;\n\n        case config_type::CT_ADD_SECONDARY:\n            ASSERT_FALSE(is_member(pc, act.node));\n            ASSERT_EQ(pc.primary, act.target);\n            ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n            pc.secondaries.push_back(act.node);\n            ns = &nodes[act.node];\n            ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE);\n            ns->put_partition(pc.pid, false);\n            break;\n\n        default:\n            ASSERT_TRUE(false);\n            break;\n        }\n    }\n\n    // test upgrade to primary\n    ASSERT_EQ(nodes[pc.primary].served_as(pc.pid), partition_status::PS_PRIMARY);\n    nodes[pc.primary].remove_partition(pc.pid, true);\n    pc.primary.set_invalid();\n\n    ps = guardian.cure({&apps, &nodes}, pc.pid, act);\n    ASSERT_EQ(act.type, config_type::CT_UPGRADE_TO_PRIMARY);\n    ASSERT_TRUE(pc.primary.is_invalid());\n    ASSERT_EQ(act.node, act.target);\n    ASSERT_TRUE(is_secondary(pc, act.node));\n    ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n\n    ns = &nodes[act.node];\n    pc.primary = act.node;\n    std::remove(pc.secondaries.begin(), pc.secondaries.end(), pc.primary);\n\n    ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY);\n    ns->put_partition(pc.pid, true);\n}\n\n// static void verbose_nodes(const node_mapper& nodes)\n//{\n//    std::cout << \"------------\" << std::endl;\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"node: %s\\ntotal_primaries: %d, total_secondaries: %d\\n\", n.first.to_string(),\n//        ns.primary_count(), ns.partition_count());\n//        for (int i=1; i<=2; ++i)\n//        {\n//            printf(\"app %d primaries: %d, app %d partitions: %d\\n\", i, ns.primary_count(i), i,\n//            ns.partition_count(i));\n//        }\n//    }\n//}\n//\n// static void verbose_app_node(const node_mapper& nodes)\n//{\n//    printf(\"Total_Pri: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.primary_count());\n//    }\n//    printf(\"\\nTotal_Sec: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.secondary_count());\n//    }\n//    printf(\"\\nApp01_Pri: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.primary_count(1));\n//    }\n//    printf(\"\\nApp01_Sec: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.secondary_count(1));\n//    }\n//    printf(\"\\nApp02_Pri: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.primary_count(2));\n//    }\n//    printf(\"\\nApp02_Sec: \");\n//    for (const auto& n: nodes)\n//    {\n//        const node_state& ns = n.second;\n//        printf(\"%*d\", 3, ns.secondary_count(2));\n//    }\n//    printf(\"\\n\");\n//}\n\n// static void verbose_app(const std::shared_ptr<app_state>& app)\n//{\n//    std::cout << app->app_name << \" \" << app->app_id << \" \" << app->partition_count << std::endl;\n//    for (int i=0; i<app->partition_count; ++i)\n//    {\n//        const partition_configuration& pc = app->partitions[i];\n//        std::cout << pc.primary.to_string();\n//        for (int j=0; j<pc.secondaries.size(); ++j)\n//        {\n//            std::cout << \" \" << pc.secondaries[j].to_string();\n//        }\n//        std::cout << std::endl;\n//    }\n//}\n// static void print_node_fs_manager(const app_mapper &apps,\n//                                  const node_mapper &nodes,\n//                                  const nodes_fs_manager &manager)\n//{\n//    int apps_count = apps.size();\n//    for (const auto &kv : nodes) {\n//        const node_state &ns = kv.second;\n//        printf(\"%s: %d primaries, %d partitions\\n\",\n//               ns.addr().to_string(),\n//               ns.primary_count(),\n//               ns.partition_count());\n//        printf(\"%8s\", \"tag\");\n//        for (int i = 1; i <= apps_count; ++i) {\n//            std::string app = std::string(\"app\") + std::to_string(i);\n//            printf(\"%8s\", app.c_str());\n//        }\n//        printf(\"\\n\");\n//        const fs_manager &m = manager.find(ns.addr())->second;\n//        m.for_each_dir_node([apps_count](const dir_node &dn) {\n//            printf(\"%8s\", dn.tag.c_str());\n//            for (int i = 1; i <= apps_count; ++i) {\n//                printf(\"%8u\", dn.replicas_count(i));\n//            }\n//            printf(\"%8u\\n\", dn.replicas_count());\n//            return true;\n//        });\n//    }\n//}\n\nvoid meta_service_test_app::balancer_validator()\n{\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 20, 100);\n\n    app_mapper apps;\n    node_mapper nodes;\n    nodes_fs_manager manager;\n    int disk_on_node = 9;\n\n    meta_service svc;\n    greedy_load_balancer glb(&svc);\n\n    generate_apps(\n        apps, node_list, 5, disk_on_node, std::pair<uint32_t, uint32_t>(1000, 2000), true);\n    generate_node_mapper(nodes, apps, node_list);\n    generate_node_fs_manager(apps, nodes, manager, disk_on_node);\n    migration_list ml;\n\n    for (auto &iter : nodes) {\n        dinfo(\"node(%s) have %d primaries, %d partitions\",\n              iter.first.to_string(),\n              iter.second.primary_count(),\n              iter.second.partition_count());\n    }\n\n    // iterate 1000000 times\n    for (int i = 0; i < 1000000 && glb.balance({&apps, &nodes}, ml); ++i) {\n        dinfo(\"the %dth round of balancer\", i);\n        migration_check_and_apply(apps, nodes, ml, &manager);\n        glb.check({&apps, &nodes}, ml);\n        dinfo(\"balance checker operation count = %d\", ml.size());\n    }\n\n    for (auto &iter : nodes) {\n        dinfo(\"node(%s) have %d primaries, %d partitions\",\n              iter.first.to_string(),\n              iter.second.primary_count(),\n              iter.second.partition_count());\n    }\n\n    std::shared_ptr<app_state> &the_app = apps[1];\n    for (::dsn::partition_configuration &pc : the_app->partitions) {\n        ASSERT_FALSE(pc.primary.is_invalid());\n        ASSERT_TRUE(pc.secondaries.size() >= pc.max_replica_count - 1);\n    }\n\n    // now test the cure\n    ::dsn::partition_configuration &pc = the_app->partitions[0];\n    nodes[pc.primary].remove_partition(pc.pid, false);\n    for (const dsn::rpc_address &addr : pc.secondaries)\n        nodes[addr].remove_partition(pc.pid, false);\n    pc.primary.set_invalid();\n    pc.secondaries.clear();\n\n    // cure test\n    check_cure(apps, nodes, pc);\n}\n\ndsn::rpc_address get_rpc_address(const std::string &ip_port)\n{\n    int splitter = ip_port.find_first_of(':');\n    return rpc_address(ip_port.substr(0, splitter).c_str(),\n                       boost::lexical_cast<int>(ip_port.substr(splitter + 1)));\n}\n\nstatic void load_apps_and_nodes(const char *file, app_mapper &apps, node_mapper &nodes)\n{\n    apps.clear();\n    nodes.clear();\n\n    std::ifstream infile(file, std::ios::in);\n    int total_nodes;\n    infile >> total_nodes;\n\n    std::string ip_port;\n    std::vector<dsn::rpc_address> node_list;\n    for (int i = 0; i < total_nodes; ++i) {\n        infile >> ip_port;\n        node_list.push_back(get_rpc_address(ip_port));\n    }\n\n    int total_apps;\n    infile >> total_apps;\n    for (int i = 0; i < total_apps; ++i) {\n        app_info info;\n        infile >> info.app_id >> info.partition_count;\n        info.app_name = \"test_app_\" + boost::lexical_cast<std::string>(info.app_id);\n        info.app_type = \"test\";\n        info.max_replica_count = 3;\n        info.is_stateful = true;\n        info.status = app_status::AS_AVAILABLE;\n\n        std::shared_ptr<app_state> app(new app_state(info));\n        apps[info.app_id] = app;\n        for (int j = 0; j < info.partition_count; ++j) {\n            int n;\n            infile >> n;\n            infile >> ip_port;\n            app->partitions[j].primary = get_rpc_address(ip_port);\n            for (int k = 1; k < n; ++k) {\n                infile >> ip_port;\n                app->partitions[j].secondaries.push_back(get_rpc_address(ip_port));\n            }\n        }\n    }\n\n    generate_node_mapper(nodes, apps, node_list);\n}\n\nvoid meta_service_test_app::balance_config_file()\n{\n    const char *suits[] = {\"suite1\", \"suite2\", nullptr};\n\n    app_mapper apps;\n    node_mapper nodes;\n\n    for (int i = 0; suits[i]; ++i) {\n        load_apps_and_nodes(suits[i], apps, nodes);\n\n        greedy_load_balancer greedy_lb(nullptr);\n        server_load_balancer *lb = &greedy_lb;\n        migration_list ml;\n\n        // iterate 1000 times\n        for (int i = 0; i < 1000 && lb->balance({&apps, &nodes}, ml); ++i) {\n            dinfo(\"the %dth round of balancer\", i);\n            migration_check_and_apply(apps, nodes, ml, nullptr);\n            lb->check({&apps, &nodes}, ml);\n            dinfo(\"balance checker operation count = %d\", ml.size());\n        }\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/clear.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\nrm -rf core data/ meta_state.dump* zoolog.log backup_data *.xml test_policy_name test_backup_root block_service/\n"
  },
  {
    "path": "src/meta/test/cluster_balance_policy_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/fail_point.h>\n#include \"meta/cluster_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\n\nTEST(cluster_balance_policy, app_migration_info)\n{\n    {\n        cluster_balance_policy::app_migration_info info1;\n        info1.app_id = 1;\n        cluster_balance_policy::app_migration_info info2;\n        info2.app_id = 2;\n        ASSERT_LT(info1, info2);\n    }\n\n    {\n        cluster_balance_policy::app_migration_info info1;\n        info1.app_id = 2;\n        cluster_balance_policy::app_migration_info info2;\n        info2.app_id = 2;\n        ASSERT_EQ(info1, info2);\n    }\n}\n\nTEST(cluster_balance_policy, node_migration_info)\n{\n    {\n        cluster_balance_policy::node_migration_info info1;\n        info1.address = rpc_address(1, 10086);\n        cluster_balance_policy::node_migration_info info2;\n        info2.address = rpc_address(2, 10086);\n        ASSERT_LT(info1, info2);\n    }\n\n    {\n        cluster_balance_policy::node_migration_info info1;\n        info1.address = rpc_address(1, 10000);\n        cluster_balance_policy::node_migration_info info2;\n        info2.address = rpc_address(1, 10086);\n        ASSERT_LT(info1, info2);\n    }\n\n    {\n        cluster_balance_policy::node_migration_info info1;\n        info1.address = rpc_address(1, 10086);\n        cluster_balance_policy::node_migration_info info2;\n        info2.address = rpc_address(1, 10086);\n        ASSERT_EQ(info1, info2);\n    }\n}\n\nTEST(cluster_balance_policy, get_skew)\n{\n    std::map<rpc_address, uint32_t> count_map = {\n        {rpc_address(1, 10086), 1}, {rpc_address(2, 10086), 3}, {rpc_address(3, 10086), 5},\n    };\n\n    ASSERT_EQ(get_skew(count_map), count_map.rbegin()->second - count_map.begin()->second);\n}\n\nTEST(cluster_balance_policy, get_partition_count)\n{\n    node_state ns;\n    int appid = 1;\n    ns.put_partition(gpid(appid, 0), true);\n    ns.put_partition(gpid(appid, 1), false);\n    ns.put_partition(gpid(appid, 2), false);\n    ns.put_partition(gpid(appid, 3), false);\n\n    ASSERT_EQ(get_partition_count(ns, balance_type::COPY_PRIMARY, appid), 1);\n    ASSERT_EQ(get_partition_count(ns, balance_type::COPY_SECONDARY, appid), 3);\n}\n\nTEST(cluster_balance_policy, get_app_migration_info)\n{\n    cluster_balance_policy policy(nullptr);\n\n    int appid = 1;\n    std::string appname = \"test\";\n    auto address = rpc_address(1, 10086);\n    app_info info;\n    info.app_id = appid;\n    info.app_name = appname;\n    info.partition_count = 1;\n    auto app = std::make_shared<app_state>(info);\n    app->partitions[0].primary = address;\n\n    node_state ns;\n    ns.set_addr(address);\n    ns.put_partition(gpid(appid, 0), true);\n    node_mapper nodes;\n    nodes[address] = ns;\n\n    cluster_balance_policy::app_migration_info migration_info;\n    {\n        app->partitions[0].max_replica_count = 100;\n        auto res =\n            policy.get_app_migration_info(app, nodes, balance_type::COPY_PRIMARY, migration_info);\n        ASSERT_FALSE(res);\n    }\n\n    {\n        app->partitions[0].max_replica_count = 1;\n        auto res =\n            policy.get_app_migration_info(app, nodes, balance_type::COPY_PRIMARY, migration_info);\n        ASSERT_TRUE(res);\n        ASSERT_EQ(migration_info.app_id, appid);\n        ASSERT_EQ(migration_info.app_name, appname);\n        std::map<rpc_address, partition_status::type> pstatus_map;\n        pstatus_map[address] = partition_status::type::PS_PRIMARY;\n        ASSERT_EQ(migration_info.partitions[0], pstatus_map);\n        ASSERT_EQ(migration_info.replicas_count[address], 1);\n    }\n}\n\nTEST(cluster_balance_policy, get_node_migration_info)\n{\n    cluster_balance_policy policy(nullptr);\n\n    int appid = 1;\n    std::string appname = \"test\";\n    auto address = rpc_address(1, 10086);\n    app_info info;\n    info.app_id = appid;\n    info.app_name = appname;\n    info.partition_count = 1;\n    auto app = std::make_shared<app_state>(info);\n    app->partitions[0].primary = address;\n    serving_replica sr;\n    sr.node = address;\n    std::string disk_tag = \"disk1\";\n    sr.disk_tag = disk_tag;\n    config_context context;\n    context.config_owner = new partition_configuration();\n    auto cleanup = dsn::defer([&context]() { delete context.config_owner; });\n    context.config_owner->pid = gpid(appid, 0);\n    context.serving.emplace_back(std::move(sr));\n    app->helpers->contexts.emplace_back(std::move(context));\n\n    app_mapper all_apps;\n    all_apps[appid] = app;\n\n    node_state ns;\n    ns.set_addr(address);\n    gpid pid = gpid(appid, 0);\n    ns.put_partition(pid, true);\n\n    cluster_balance_policy::node_migration_info migration_info;\n    policy.get_node_migration_info(ns, all_apps, migration_info);\n\n    ASSERT_EQ(migration_info.address, address);\n    ASSERT_NE(migration_info.partitions.find(disk_tag), migration_info.partitions.end());\n    ASSERT_EQ(migration_info.partitions.at(disk_tag).size(), 1);\n    ASSERT_EQ(*migration_info.partitions.at(disk_tag).begin(), pid);\n}\n\nTEST(cluster_balance_policy, get_min_max_set)\n{\n    std::map<rpc_address, uint32_t> node_count_map;\n    node_count_map.emplace(rpc_address(1, 10086), 1);\n    node_count_map.emplace(rpc_address(2, 10086), 3);\n    node_count_map.emplace(rpc_address(3, 10086), 5);\n    node_count_map.emplace(rpc_address(4, 10086), 5);\n\n    std::set<rpc_address> min_set, max_set;\n    get_min_max_set(node_count_map, min_set, max_set);\n\n    ASSERT_EQ(min_set.size(), 1);\n    ASSERT_EQ(*min_set.begin(), rpc_address(1, 10086));\n    ASSERT_EQ(max_set.size(), 2);\n    ASSERT_EQ(*max_set.begin(), rpc_address(3, 10086));\n    ASSERT_EQ(*max_set.rbegin(), rpc_address(4, 10086));\n}\n\nTEST(cluster_balance_policy, get_disk_partitions_map)\n{\n    cluster_balance_policy policy(nullptr);\n    cluster_balance_policy::cluster_migration_info cluster_info;\n    rpc_address addr(1, 10086);\n    int32_t app_id = 1;\n\n    auto disk_partitions = policy.get_disk_partitions_map(cluster_info, addr, app_id);\n    ASSERT_TRUE(disk_partitions.empty());\n\n    std::map<rpc_address, partition_status::type> partition;\n    partition[addr] = partition_status::PS_SECONDARY;\n    cluster_balance_policy::app_migration_info app_info;\n    app_info.partitions.push_back(partition);\n    cluster_info.apps_info[app_id] = app_info;\n\n    partition_set partitions;\n    gpid pid(app_id, 0);\n    partitions.insert(pid);\n    cluster_balance_policy::node_migration_info node_info;\n    std::string disk_tag = \"disk1\";\n    node_info.partitions[disk_tag] = partitions;\n    cluster_info.nodes_info[addr] = node_info;\n\n    cluster_info.type = balance_type::COPY_SECONDARY;\n    disk_partitions = policy.get_disk_partitions_map(cluster_info, addr, app_id);\n    ASSERT_EQ(disk_partitions.size(), 1);\n    ASSERT_EQ(disk_partitions.count(disk_tag), 1);\n    ASSERT_EQ(disk_partitions[disk_tag].size(), 1);\n    ASSERT_EQ(disk_partitions[disk_tag].count(pid), 1);\n}\n\nTEST(cluster_balance_policy, get_max_load_disk_set)\n{\n    cluster_balance_policy::cluster_migration_info cluster_info;\n    cluster_info.type = balance_type::COPY_SECONDARY;\n\n    int32_t app_id = 1;\n    rpc_address addr(1, 10086);\n    rpc_address addr2(2, 10086);\n    std::map<rpc_address, partition_status::type> partition;\n    partition[addr] = partition_status::PS_SECONDARY;\n    std::map<rpc_address, partition_status::type> partition2;\n    partition2[addr] = partition_status::PS_SECONDARY;\n    partition2[addr2] = partition_status::PS_SECONDARY;\n    cluster_balance_policy::app_migration_info app_info;\n    app_info.partitions.push_back(partition);\n    app_info.partitions.push_back(partition2);\n    cluster_info.apps_info[app_id] = app_info;\n\n    cluster_balance_policy::node_migration_info node_info;\n    partition_set partitions;\n    gpid pid(app_id, 0);\n    partitions.insert(pid);\n    std::string disk_tag = \"disk1\";\n    node_info.partitions[disk_tag] = partitions;\n    partition_set partitions2;\n    gpid pid2(app_id, 1);\n    partitions2.insert(pid2);\n    std::string disk_tag2 = \"disk2\";\n    node_info.partitions[disk_tag2] = partitions2;\n    cluster_info.nodes_info[addr] = node_info;\n\n    cluster_balance_policy::node_migration_info node_info2;\n    partition_set partitions3;\n    gpid pid3(app_id, 1);\n    partitions3.insert(pid3);\n    std::string disk_tag3 = \"disk3\";\n    node_info2.partitions[disk_tag3] = partitions3;\n    cluster_info.nodes_info[addr2] = node_info2;\n\n    cluster_balance_policy policy(nullptr);\n    std::set<rpc_address> max_nodes;\n    max_nodes.insert(addr);\n    max_nodes.insert(addr2);\n\n    std::set<cluster_balance_policy::app_disk_info> max_load_disk_set;\n    policy.get_max_load_disk_set(cluster_info, max_nodes, app_id, max_load_disk_set);\n\n    ASSERT_EQ(max_load_disk_set.size(), 3);\n}\n\nTEST(cluster_balance_policy, apply_move)\n{\n    struct cluster_balance_policy::move_info minfo;\n    int32_t app_id = 1;\n    int32_t partition_index = 1;\n    minfo.pid = gpid(app_id, partition_index);\n    rpc_address source_node(1, 10086);\n    minfo.source_node = source_node;\n    std::string disk_tag = \"disk1\";\n    minfo.source_disk_tag = disk_tag;\n    rpc_address target_node(2, 10086);\n    minfo.target_node = target_node;\n    minfo.type = balance_type::MOVE_PRIMARY;\n\n    node_mapper nodes;\n    app_mapper apps;\n    meta_view view;\n    view.apps = &apps;\n    view.nodes = &nodes;\n\n    cluster_balance_policy policy(nullptr);\n    policy._global_view = &view;\n    cluster_balance_policy::cluster_migration_info cluster_info;\n    cluster_info.type = balance_type::COPY_SECONDARY;\n    partition_set selected_pids;\n    migration_list list;\n    policy._migration_result = &list;\n\n    // target_node is not found in cluster_info.replicas_count\n    auto res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // source_node is not found in cluster_info.replicas_count\n    cluster_info.apps_skew[app_id] = 1;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // target_node is not found in cluster_info.replicas_count\n    cluster_info.replicas_count[source_node] = 1;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // app_id is not found in cluster_info.app_skew\n    cluster_info.replicas_count[target_node] = 1;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // source_node and target_node are not found in app_info\n    cluster_balance_policy::app_migration_info app_info;\n    cluster_info.apps_info[app_id] = app_info;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // app_info.partitions.size() < partition_index\n    app_info.replicas_count[target_node] = 1;\n    app_info.replicas_count[source_node] = 1;\n    cluster_info.apps_info[app_id] = app_info;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // all of the partition status are not PS_SECONDARY\n    std::map<rpc_address, partition_status::type> partition_status;\n    partition_status[source_node] = partition_status::type::PS_PRIMARY;\n    cluster_info.apps_info[app_id].partitions.push_back(partition_status);\n    cluster_info.apps_info[app_id].partitions.push_back(partition_status);\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // target_node and source_node are not found in cluster_info.nodes_info\n    partition_status[source_node] = partition_status::type::PS_SECONDARY;\n    cluster_info.apps_info[app_id].partitions.clear();\n    cluster_info.apps_info[app_id].partitions.push_back(partition_status);\n    cluster_info.apps_info[app_id].partitions.push_back(partition_status);\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    // disk_tag is not found in node_info\n    cluster_balance_policy::node_migration_info target_info;\n    cluster_balance_policy::node_migration_info source_info;\n    cluster_info.nodes_info[target_node] = target_info;\n    cluster_info.nodes_info[source_node] = source_info;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    ASSERT_FALSE(res);\n\n    fail::setup();\n    fail::cfg(\"generate_balancer_request\", \"return()\");\n    partition_set source_partition_set;\n    cluster_info.nodes_info[source_node].partitions[disk_tag] = source_partition_set;\n    res = policy.apply_move(minfo, selected_pids, list, cluster_info);\n    fail::teardown();\n    ASSERT_TRUE(res);\n}\n\nTEST(cluster_balance_policy, pick_up_partition)\n{\n    cluster_balance_policy::cluster_migration_info cluster_info;\n    rpc_address addr(1, 10086);\n    int32_t app_id = 1;\n    std::map<rpc_address, partition_status::type> partition;\n    partition[addr] = partition_status::PS_SECONDARY;\n    cluster_balance_policy::app_migration_info app_info;\n    app_info.partitions.push_back(partition);\n    cluster_info.apps_info[app_id] = app_info;\n\n    cluster_balance_policy policy(nullptr);\n    {\n        // all of the partitions in max_load_partitions are not found in cluster_info\n        partition_set max_load_partitions;\n        int32_t not_exist_app_id = 2;\n        max_load_partitions.insert(gpid(not_exist_app_id, 10086));\n\n        partition_set selected_pid;\n        gpid picked_pid;\n        auto found = policy.pick_up_partition(\n            cluster_info, addr, max_load_partitions, selected_pid, picked_pid);\n        ASSERT_FALSE(found);\n    }\n\n    {\n        // all of the partitions in max_load_partitions are found in selected_pid\n        partition_set max_load_partitions;\n        max_load_partitions.insert(gpid(app_id, 10086));\n        partition_set selected_pid;\n        selected_pid.insert(gpid(app_id, 10086));\n\n        gpid picked_pid;\n        auto found = policy.pick_up_partition(\n            cluster_info, addr, max_load_partitions, selected_pid, picked_pid);\n        ASSERT_FALSE(found);\n    }\n\n    {\n        // partition has already been primary or secondary on min_node\n        partition_set max_load_partitions;\n        max_load_partitions.insert(gpid(app_id, 0));\n        partition_set selected_pid;\n\n        gpid picked_pid;\n        auto found = policy.pick_up_partition(\n            cluster_info, addr, max_load_partitions, selected_pid, picked_pid);\n        ASSERT_FALSE(found);\n    }\n\n    {\n        partition_set max_load_partitions;\n        gpid pid(app_id, 0);\n        max_load_partitions.insert(pid);\n        partition_set selected_pid;\n        rpc_address not_exist_addr(3, 12345);\n\n        gpid picked_pid;\n        auto found = policy.pick_up_partition(\n            cluster_info, not_exist_addr, max_load_partitions, selected_pid, picked_pid);\n        ASSERT_TRUE(found);\n        ASSERT_EQ(pid, picked_pid);\n    }\n}\n\nbool balance_func(const std::shared_ptr<app_state> &app, bool only_move_primary)\n{\n    return only_move_primary;\n}\n\nTEST(cluster_balance_policy, execute_balance)\n{\n    int32_t app_id = 1;\n    std::string app_name = \"test\";\n    app_info info;\n    info.app_id = app_id;\n    info.app_name = app_name;\n    info.partition_count = 1;\n    info.status = app_status::AS_AVAILABLE;\n    info.is_bulk_loading = false;\n    auto app = std::make_shared<app_state>(info);\n    app->helpers->split_states.splitting_count = 0;\n    app_mapper apps;\n    apps[app_id] = app;\n    cluster_balance_policy policy(nullptr);\n\n    app->status = app_status::AS_DROPPED;\n    auto res = policy.execute_balance(apps, false, false, true, balance_func);\n    app->status = app_status::AS_AVAILABLE;\n    ASSERT_EQ(res, true);\n\n    app->is_bulk_loading = true;\n    res = policy.execute_balance(apps, false, false, true, balance_func);\n    app->is_bulk_loading = false;\n    ASSERT_EQ(res, true);\n\n    app->helpers->split_states.splitting_count = 1;\n    res = policy.execute_balance(apps, false, false, true, balance_func);\n    app->helpers->split_states.splitting_count = 0;\n    ASSERT_EQ(res, true);\n\n    res = policy.execute_balance(apps, false, true, false, balance_func);\n    ASSERT_EQ(res, false);\n\n    res = policy.execute_balance(apps, true, false, true, balance_func);\n    ASSERT_EQ(res, true);\n\n    migration_list migration_result;\n    migration_result.emplace(gpid(1, 1), std::make_shared<configuration_balancer_request>());\n    policy._migration_result = &migration_result;\n    res = policy.execute_balance(apps, false, true, true, balance_func);\n    ASSERT_EQ(res, false);\n}\n\nTEST(cluster_balance_policy, calc_potential_moving)\n{\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n    partition_configuration pc;\n    pc.primary = addr1;\n    pc.secondaries.push_back(addr2);\n    pc.secondaries.push_back(addr3);\n    app->partitions[0] = pc;\n    app->partitions[1] = pc;\n\n    app_mapper apps;\n    apps[app_id] = app;\n\n    node_mapper nodes;\n    node_state ns1;\n    ns1.put_partition(gpid(app_id, 0), true);\n    ns1.put_partition(gpid(app_id, 1), true);\n    nodes[addr1] = ns1;\n\n    node_state ns2;\n    ns2.put_partition(gpid(app_id, 0), false);\n    ns2.put_partition(gpid(app_id, 1), false);\n    nodes[addr2] = ns2;\n    nodes[addr3] = ns2;\n\n    struct meta_view view;\n    view.nodes = &nodes;\n    view.apps = &apps;\n    cluster_balance_policy policy(nullptr);\n    policy._global_view = &view;\n\n    auto gpids = policy.calc_potential_moving(app, addr1, addr2);\n    ASSERT_EQ(gpids.size(), 2);\n    ASSERT_EQ(*gpids.begin(), gpid(app_id, 0));\n    ASSERT_EQ(*gpids.rbegin(), gpid(app_id, 1));\n\n    gpids = policy.calc_potential_moving(app, addr1, addr3);\n    ASSERT_EQ(gpids.size(), 2);\n    ASSERT_EQ(*gpids.begin(), gpid(app_id, 0));\n    ASSERT_EQ(*gpids.rbegin(), gpid(app_id, 1));\n\n    gpids = policy.calc_potential_moving(app, addr2, addr3);\n    ASSERT_EQ(gpids.size(), 0);\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.test_meta]\ntype = test_meta\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_DLOCK,THREAD_POOL_META_STATE,THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_META_TEST\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_ERROR\n\n[tools.simulator]\nrandom_seed = 1465902258\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 3\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_META_TEST]\npartitioned = false\nworker_count = 1\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[meta_server]\nmeta_state_service_type = meta_state_service_simple\nserver_list = 127.0.0.1:34601\ncluster_root = /meta_test\nmeta_function_level_on_start = lively\napp_balancer_in_turn = false\nonly_primary_balancer = false\nonly_move_primary = false\ncold_backup_disabled = false\n\n[replication]\ncluster_name = master-cluster\nduplication_enabled = true\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[zookeeper]\nhosts_list = localhost:22181\n;hosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n\n[block_service.local_service]\ntype = local_service\nargs = ./block_service\n\n[block_service.local_service_empty_root]\ntype = local_service\nargs =\n\n[block_service.fds_service]\ntype = fds_service\nargs =\n\n[duplication-group]\nmaster-cluster = 1\nslave-cluster  = 2\ncluster_without_address_for_test = 3\n\n[pegasus.clusters]\nmaster-cluster = 127.0.0.1:34601\nslave-cluster  = 127.0.0.1:35601\n"
  },
  {
    "path": "src/meta/test/copy_replica_operation_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n#include \"meta/app_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\n\nTEST(copy_primary_operation, misc)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n    app_mapper apps;\n    apps[app_id] = app;\n\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n\n    node_mapper nodes;\n    node_state ns1;\n    ns1.put_partition(gpid(app_id, 2), true);\n    ns1.put_partition(gpid(app_id, 0), false);\n    nodes[addr1] = ns1;\n    node_state ns2;\n    ns2.put_partition(gpid(app_id, 0), true);\n    ns2.put_partition(gpid(app_id, 1), true);\n    nodes[addr2] = ns2;\n    node_state ns3;\n    ns3.put_partition(gpid(app_id, 2), false);\n    nodes[addr3] = ns3;\n\n    std::vector<dsn::rpc_address> address_vec{addr1, addr2, addr3};\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    address_id[addr1] = 0;\n    address_id[addr2] = 1;\n    address_id[addr3] = 2;\n    copy_primary_operation op(app, apps, nodes, address_vec, address_id, false, 0);\n\n    /**\n     * Test init_ordered_address_ids\n     */\n    op.init_ordered_address_ids();\n    ASSERT_EQ(op._ordered_address_ids.size(), 3);\n    ASSERT_EQ(*op._ordered_address_ids.begin(), 2);\n    ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0);\n    ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1);\n    ASSERT_EQ(op._partition_counts[0], 1);\n    ASSERT_EQ(op._partition_counts[1], 2);\n    ASSERT_EQ(op._partition_counts[2], 0);\n\n    /**\n     * Test get_all_partitions\n     */\n    auto partitions = op.get_all_partitions();\n    ASSERT_EQ(partitions->size(), 2);\n    ASSERT_EQ(*partitions->begin(), gpid(app_id, 0));\n    ASSERT_EQ(*partitions->rbegin(), gpid(app_id, 1));\n\n    /**\n     * Test select_partition\n     */\n    std::string disk1 = \"disk1\", disk2 = \"disk2\";\n    disk_load load;\n    load[disk1] = 2;\n    load[disk2] = 6;\n    op._node_loads[addr2] = load;\n\n    serving_replica serving_partition0;\n    serving_partition0.node = addr2;\n    serving_partition0.disk_tag = disk1;\n    app->helpers->contexts[0].serving.push_back(serving_partition0);\n    serving_replica serving_partition1;\n    serving_partition1.node = addr2;\n    serving_partition1.disk_tag = disk2;\n    app->helpers->contexts[1].serving.push_back(serving_partition1);\n\n    migration_list list;\n    auto res_gpid = op.select_partition(&list);\n    ASSERT_EQ(res_gpid.get_partition_index(), 1);\n\n    /**\n     * Test can_continue\n     **/\n    op._have_lower_than_average = true;\n    ASSERT_FALSE(op.can_continue());\n\n    op._have_lower_than_average = false;\n    ASSERT_TRUE(op.can_continue());\n    op._have_lower_than_average = true;\n\n    op._replicas_low = 1;\n    ASSERT_TRUE(op.can_continue());\n    op._replicas_low = 0;\n\n    nodes[addr2].remove_partition(gpid(app_id, 1), false);\n    op.init_ordered_address_ids();\n    ASSERT_FALSE(op.can_continue());\n    nodes[addr2].put_partition(gpid(app_id, 1), true);\n\n    /**\n     * Test update_ordered_address_ids\n     */\n    nodes[addr1].put_partition(gpid(app_id, 3), true);\n    nodes[addr2].put_partition(gpid(app_id, 4), true);\n    nodes[addr2].put_partition(gpid(app_id, 5), true);\n    op.init_ordered_address_ids();\n    op.update_ordered_address_ids();\n    ASSERT_EQ(op._ordered_address_ids.size(), 3);\n    ASSERT_EQ(*op._ordered_address_ids.begin(), 2);\n    ASSERT_EQ(*(++op._ordered_address_ids.begin()), 0);\n    ASSERT_EQ(*op._ordered_address_ids.rbegin(), 1);\n    ASSERT_EQ(op._partition_counts[0], 2);\n    ASSERT_EQ(op._partition_counts[1], 3);\n    ASSERT_EQ(op._partition_counts[2], 1);\n\n    /**\n     * Test copy_once\n     */\n    fail::setup();\n    fail::cfg(\"generate_balancer_request\", \"return()\");\n    gpid gpid1(1, 0);\n    gpid gpid2(1, 1);\n    list.clear();\n    op.copy_once(gpid1, &list);\n    ASSERT_EQ(list.size(), 1);\n    ASSERT_EQ(list.count(gpid1), 1);\n    ASSERT_EQ(list.count(gpid2), 0);\n    fail::teardown();\n}\n\nTEST(copy_primary_operation, can_select)\n{\n    app_mapper apps;\n    node_mapper nodes;\n    std::vector<dsn::rpc_address> address_vec;\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false);\n\n    gpid cannot_select_gpid(1, 1);\n    gpid can_select_gpid(1, 2);\n    migration_list list;\n    list[cannot_select_gpid] = nullptr;\n\n    ASSERT_FALSE(op.can_select(cannot_select_gpid, &list));\n    ASSERT_TRUE(op.can_select(can_select_gpid, &list));\n}\n\nTEST(copy_primary_operation, only_copy_primary)\n{\n    app_mapper apps;\n    node_mapper nodes;\n    std::vector<dsn::rpc_address> address_vec;\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    copy_primary_operation op(nullptr, apps, nodes, address_vec, address_id, false, false);\n\n    ASSERT_TRUE(op.only_copy_primary());\n}\n\nTEST(copy_secondary_operation, misc)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n    app_mapper apps;\n    apps[app_id] = app;\n\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n\n    node_mapper nodes;\n    node_state ns1;\n    ns1.put_partition(gpid(app_id, 2), true);\n    ns1.put_partition(gpid(app_id, 0), false);\n    nodes[addr1] = ns1;\n    node_state ns2;\n    ns2.put_partition(gpid(app_id, 0), true);\n    ns2.put_partition(gpid(app_id, 1), true);\n    nodes[addr2] = ns2;\n    node_state ns3;\n    nodes[addr3] = ns3;\n\n    std::vector<dsn::rpc_address> address_vec{addr1, addr2, addr3};\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    address_id[addr1] = 0;\n    address_id[addr2] = 1;\n    address_id[addr3] = 2;\n    copy_secondary_operation op(app, apps, nodes, address_vec, address_id, 0);\n    op.init_ordered_address_ids();\n\n    /**\n     * Test copy_secondary_operation::get_partition_count\n     */\n    ASSERT_EQ(op.get_partition_count(ns1), 2);\n    ASSERT_EQ(op.get_partition_count(ns2), 2);\n    ASSERT_EQ(op.get_partition_count(ns3), 0);\n\n    /**\n     * Test copy_secondary_operation::can_continue\n     */\n    auto res = op.can_continue();\n    ASSERT_TRUE(res);\n\n    op._replicas_low = 100;\n    res = op.can_continue();\n    ASSERT_FALSE(res);\n    op._replicas_low = 0;\n\n    nodes[addr3].put_partition(gpid(app_id, 2), false);\n    op.init_ordered_address_ids();\n    res = op.can_continue();\n    ASSERT_FALSE(res);\n    nodes[addr3].remove_partition(gpid(app_id, 2), false);\n\n    /**\n     * Test copy_secondary_operation::can_select\n     */\n    nodes[addr1].put_partition(gpid(app_id, 3), true);\n    op.init_ordered_address_ids();\n    migration_list list;\n    res = op.can_select(gpid(app_id, 3), &list);\n    ASSERT_FALSE(res);\n\n    auto secondary_gpid = gpid(app_id, 0);\n    list[secondary_gpid] = nullptr;\n    res = op.can_select(secondary_gpid, &list);\n    ASSERT_FALSE(res);\n    list.clear();\n\n    nodes[addr3].put_partition(secondary_gpid, true);\n    op.init_ordered_address_ids();\n    res = op.can_select(secondary_gpid, &list);\n    ASSERT_FALSE(res);\n\n    nodes[addr3].remove_partition(secondary_gpid, false);\n    op.init_ordered_address_ids();\n    res = op.can_select(secondary_gpid, &list);\n    ASSERT_TRUE(res);\n\n    /**\n     * Test copy_secondary_operation::get_balance_type\n     */\n    ASSERT_EQ(op.get_balance_type(), balance_type::COPY_SECONDARY);\n\n    /**\n     * Test copy_secondary_operation::only_copy_primary\n     */\n    ASSERT_FALSE(op.only_copy_primary());\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/dummy_balancer.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"meta/meta_service.h\"\n#include \"meta/server_load_balancer.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass dummy_balancer : public server_load_balancer\n{\npublic:\n    dummy_balancer(meta_service *s) : server_load_balancer(s) {}\n    virtual bool balance(meta_view view, migration_list &list) { return false; }\n    virtual bool check(meta_view view, migration_list &list) { return false; }\n    virtual void report(const migration_list &list, bool balance_checker) {}\n    virtual std::string get_balance_operation_count(const std::vector<std::string> &args)\n    {\n        return std::string(\"unknown\");\n    }\n    virtual void score(meta_view view, double &primary_stddev, double &total_stddev) {}\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/dump_file.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include \"meta/dump_file.h\"\n\nTEST(dump_file, read_write)\n{\n    unsigned int total_length = 4096;\n    std::shared_ptr<char> buffer(dsn::utils::make_shared_array<char>(total_length));\n    char *ptr = buffer.get();\n    for (int i = 0; i != total_length; ++i)\n        ptr[i] = i % 256;\n\n    std::vector<unsigned int> length_blocks;\n    {\n        std::shared_ptr<dump_file> f = dump_file::open_file(\"test_file\", true);\n        ASSERT_TRUE(f != nullptr);\n\n        unsigned int current_length = 10;\n        unsigned int step = 10;\n        unsigned int copyed = 0;\n\n        while (copyed < total_length) {\n            if (copyed + current_length > total_length)\n                current_length = total_length - copyed;\n\n            int ans = f->append_buffer(ptr + copyed, current_length);\n            ASSERT_TRUE(ans == 0);\n\n            copyed += current_length;\n            length_blocks.push_back(current_length);\n            current_length += step;\n        }\n    }\n\n    {\n        std::shared_ptr<dump_file> f = dump_file::open_file(\"test_file\", false);\n        ASSERT_TRUE(f != nullptr);\n\n        std::shared_ptr<char> out_buffer(dsn::utils::make_shared_array<char>(total_length));\n        ptr = out_buffer.get();\n        dsn::blob bb;\n        int block_offset = 0;\n        while (true) {\n            int ans = f->read_next_buffer(bb);\n            ASSERT_TRUE(ans != -1);\n            if (ans == 0)\n                break;\n\n            ASSERT_TRUE(bb.length() == length_blocks[block_offset]);\n            memcpy(ptr, bb.data(), bb.length());\n            block_offset++;\n            ptr += bb.length();\n        }\n\n        ASSERT_EQ(block_offset, length_blocks.size());\n        ASSERT_EQ(memcmp(out_buffer.get(), buffer.get(), total_length), 0);\n    }\n\n    // corrupted end\n    {\n        FILE *fp = fopen(\"test_file\", \"rb+\");\n        fseek(fp, -4, SEEK_END);\n        uint32_t num = 0;\n        fwrite(&num, sizeof(num), 1, fp);\n        fclose(fp);\n\n        std::shared_ptr<dump_file> f = dump_file::open_file(\"test_file\", false);\n        dsn::blob bb;\n        int block_offset = 0;\n        while (true) {\n            int ans = f->read_next_buffer(bb);\n            if (ans == 0)\n                break;\n\n            if (block_offset < length_blocks.size() - 1)\n                ASSERT_EQ(ans, 1);\n            else\n                ASSERT_EQ(ans, -1);\n            block_offset++;\n        }\n    }\n\n    // data loss in the end\n    {\n        FILE *fp = fopen(\"test_file\", \"rb\");\n        FILE *fp2 = fopen(\"test_file2\", \"wb\");\n\n        fseek(fp, 0, SEEK_END);\n        auto size = ftell(fp);\n        fseek(fp, 0, SEEK_SET);\n        std::unique_ptr<char[]> buf(new char[size - 4]);\n        size_t cnt = fread(buf.get(), 1, size - 4, fp);\n        ASSERT_EQ(cnt, size - 4);\n        cnt = fwrite(buf.get(), 1, cnt, fp2);\n        ASSERT_EQ(cnt, size - 4);\n\n        fclose(fp);\n        fclose(fp2);\n\n        std::shared_ptr<dump_file> f = dump_file::open_file(\"test_file2\", false);\n        dsn::blob bb;\n        int block_offset = 0;\n        while (true) {\n            int ans = f->read_next_buffer(bb);\n            if (ans == 0)\n                break;\n            if (block_offset < length_blocks.size() - 1)\n                ASSERT_EQ(ans, 1);\n            else\n                ASSERT_EQ(ans, -1);\n            block_offset++;\n        }\n    }\n}\n"
  },
  {
    "path": "src/meta/test/duplication_info_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"meta/duplication/duplication_info.h\"\n\n#include <gtest/gtest.h>\n#include <boost/algorithm/string.hpp>\n\nnamespace dsn {\nnamespace replication {\n\nclass duplication_info_test : public testing::Test\n{\npublic:\n    void force_update_status(duplication_info &dup, duplication_status::type status)\n    {\n        dup._status = status;\n    }\n\n    static void test_alter_progress()\n    {\n\n        duplication_info dup(1,\n                             1,\n                             \"temp\",\n                             2,\n                             0,\n                             \"dsn://slave-cluster/temp\",\n                             std::vector<rpc_address>(),\n                             \"/meta_test/101/duplication/1\");\n        duplication_confirm_entry entry;\n        ASSERT_FALSE(dup.alter_progress(0, entry));\n\n        dup.init_progress(0, invalid_decree);\n        entry.confirmed_decree = 5;\n        entry.checkpoint_prepared = true;\n        ASSERT_TRUE(dup.alter_progress(0, entry));\n        ASSERT_EQ(dup._progress[0].volatile_decree, 5);\n        ASSERT_TRUE(dup._progress[0].is_altering);\n        ASSERT_TRUE(dup._progress[0].checkpoint_prepared);\n\n        // busy updating\n        entry.confirmed_decree = 10;\n        entry.checkpoint_prepared = false;\n        ASSERT_FALSE(dup.alter_progress(0, entry));\n        ASSERT_EQ(dup._progress[0].volatile_decree, 5);\n        ASSERT_TRUE(dup._progress[0].is_altering);\n        ASSERT_TRUE(dup._progress[0].checkpoint_prepared);\n\n        dup.persist_progress(0);\n        ASSERT_EQ(dup._progress[0].stored_decree, 5);\n        ASSERT_FALSE(dup._progress[0].is_altering);\n        ASSERT_TRUE(dup._progress[0].checkpoint_prepared);\n\n        // too frequent to update\n        dup.init_progress(1, invalid_decree);\n        ASSERT_TRUE(dup.alter_progress(1, entry));\n        ASSERT_TRUE(dup._progress[1].is_altering);\n        dup.persist_progress(1);\n\n        ASSERT_FALSE(dup.alter_progress(1, entry));\n        ASSERT_FALSE(dup._progress[1].is_altering);\n\n        dup._progress[1].last_progress_update_ms -=\n            duplication_info::PROGRESS_UPDATE_PERIOD_MS + 100;\n\n        entry.confirmed_decree = 15;\n        entry.checkpoint_prepared = true;\n        ASSERT_TRUE(dup.alter_progress(1, entry));\n        ASSERT_TRUE(dup._progress[1].is_altering);\n        ASSERT_TRUE(dup.all_checkpoint_has_prepared());\n    }\n\n    static void test_init_and_start()\n    {\n        duplication_info dup(1,\n                             1,\n                             \"temp\",\n                             4,\n                             0,\n                             \"dsn://slave-cluster/temp\",\n                             std::vector<rpc_address>(),\n                             \"/meta_test/101/duplication/1\");\n        ASSERT_FALSE(dup.is_altering());\n        ASSERT_EQ(dup._status, duplication_status::DS_INIT);\n        ASSERT_EQ(dup._next_status, duplication_status::DS_INIT);\n\n        auto dup_ent = dup.to_duplication_entry();\n        ASSERT_EQ(dup_ent.progress.size(), 0);\n\n        for (int i = 0; i < 4; i++) {\n            dup.init_progress(i, invalid_decree);\n        }\n        for (auto kv : dup_ent.progress) {\n            ASSERT_EQ(kv.second, invalid_decree);\n        }\n\n        dup.start();\n        ASSERT_TRUE(dup.is_altering());\n        ASSERT_EQ(dup._status, duplication_status::DS_INIT);\n        ASSERT_EQ(dup._next_status, duplication_status::DS_PREPARE);\n    }\n\n    static void test_persist_status()\n    {\n        duplication_info dup(1,\n                             1,\n                             \"temp\",\n                             4,\n                             0,\n                             \"dsn://slave-cluster/temp\",\n                             std::vector<rpc_address>(),\n                             \"/meta_test/101/duplication/1\");\n        dup.start();\n\n        dup.persist_status();\n        ASSERT_EQ(dup._status, duplication_status::DS_PREPARE);\n        ASSERT_EQ(dup._next_status, duplication_status::DS_INIT);\n        ASSERT_FALSE(dup.is_altering());\n    }\n\n    static void test_encode_and_decode()\n    {\n        dsn_run_config(\"config-test.ini\", false);\n        duplication_info dup(1,\n                             1,\n                             \"temp\",\n                             4,\n                             0,\n                             \"slave-cluster\",\n                             std::vector<rpc_address>(),\n                             \"/meta_test/101/duplication/1\");\n        dup.start();\n        dup.persist_status();\n\n        dup.alter_status(duplication_status::DS_APP);\n        auto json = dup.to_json_blob();\n        dup.persist_status();\n\n        duplication_info::json_helper copy;\n        ASSERT_TRUE(json::json_forwarder<duplication_info::json_helper>::decode(json, copy));\n        ASSERT_EQ(copy.status, duplication_status::DS_APP);\n        ASSERT_EQ(copy.create_timestamp_ms, dup.create_timestamp_ms);\n        ASSERT_EQ(copy.remote, dup.follower_cluster_name);\n\n        auto dup_sptr = duplication_info::decode_from_blob(\n            1, 1, \"temp\", 4, \"/meta_test/101/duplication/1\", json);\n        ASSERT_TRUE(dup_sptr->equals_to(dup)) << dup_sptr->to_string() << \" \" << dup.to_string();\n\n        blob new_json =\n            blob::create_from_bytes(boost::replace_all_copy(json.to_string(), \"DS_APP\", \"DS_FOO\"));\n        ASSERT_FALSE(json::json_forwarder<duplication_info::json_helper>::decode(new_json, copy));\n        ASSERT_EQ(copy.status, duplication_status::DS_REMOVED);\n    }\n};\n\nTEST_F(duplication_info_test, alter_status_when_busy)\n{\n    duplication_info dup(1,\n                         1,\n                         \"temp\",\n                         4,\n                         0,\n                         \"dsn://slave-cluster/temp\",\n                         std::vector<rpc_address>(),\n                         \"/meta_test/101/duplication/1\");\n    dup.start();\n\n    ASSERT_EQ(dup.alter_status(duplication_status::DS_PAUSE), ERR_BUSY);\n}\n\nTEST_F(duplication_info_test, alter_status)\n{\n    struct TestData\n    {\n        std::vector<duplication_status::type> from_list;\n        std::vector<duplication_status::type> to_list;\n\n        error_code wec;\n    } tests[] = {\n        {{duplication_status::DS_INIT, duplication_status::DS_PREPARE},\n         {duplication_status::DS_PREPARE},\n         ERR_OK},\n        {{duplication_status::DS_PREPARE, duplication_status::DS_APP},\n         {duplication_status::DS_APP},\n         ERR_OK},\n        {{duplication_status::DS_INIT,\n          duplication_status::DS_APP,\n          duplication_status::DS_PAUSE,\n          duplication_status::DS_LOG},\n         {duplication_status::DS_LOG},\n         ERR_OK},\n        {{duplication_status::DS_LOG, duplication_status::DS_PAUSE},\n         {duplication_status::DS_PAUSE},\n         ERR_OK},\n\n        {{duplication_status::DS_INIT,\n          duplication_status::DS_PREPARE,\n          duplication_status::DS_APP,\n          duplication_status::DS_PAUSE,\n          duplication_status::DS_LOG},\n         {duplication_status::DS_REMOVED},\n         ERR_OK},\n\n        {{duplication_status::DS_PREPARE,\n          duplication_status::DS_APP,\n          duplication_status::DS_PAUSE,\n          duplication_status::DS_LOG},\n         {duplication_status::DS_INIT},\n         ERR_INVALID_PARAMETERS},\n\n        {{duplication_status::DS_REMOVED},\n         {duplication_status::DS_INIT,\n          duplication_status::DS_PREPARE,\n          duplication_status::DS_APP,\n          duplication_status::DS_PAUSE,\n          duplication_status::DS_LOG},\n         ERR_OBJECT_NOT_FOUND},\n\n        {{duplication_status::DS_INIT, duplication_status::DS_PREPARE, duplication_status::DS_APP},\n         {duplication_status::DS_PAUSE},\n         ERR_INVALID_PARAMETERS},\n\n        {{duplication_status::DS_PREPARE}, {duplication_status::DS_LOG}, ERR_INVALID_PARAMETERS},\n\n        {{duplication_status::DS_LOG},\n         {duplication_status::DS_INIT, duplication_status::DS_PREPARE, duplication_status::DS_APP},\n         ERR_INVALID_PARAMETERS}};\n\n    for (auto tt : tests) {\n        duplication_info dup(1,\n                             1,\n                             \"temp\",\n                             4,\n                             0,\n                             \"dsn://slave-cluster/temp\",\n                             std::vector<rpc_address>(),\n                             \"/meta_test/101/duplication/1\");\n        for (const auto from : tt.from_list) {\n            force_update_status(dup, from);\n            for (const auto to : tt.to_list) {\n                ASSERT_EQ(dup.alter_status(to), tt.wec);\n                if (dup.is_altering()) {\n                    dup.persist_status();\n                }\n            }\n        }\n    }\n}\n\nTEST_F(duplication_info_test, alter_progress) { test_alter_progress(); }\n\nTEST_F(duplication_info_test, persist_status) { test_persist_status(); }\n\nTEST_F(duplication_info_test, init_and_start) { test_init_and_start(); }\n\nTEST_F(duplication_info_test, encode_and_decode) { test_encode_and_decode(); }\n\nTEST_F(duplication_info_test, is_valid)\n{\n    duplication_info dup(1,\n                         1,\n                         \"temp\",\n                         4,\n                         0,\n                         \"dsn://slave-cluster/temp\",\n                         std::vector<rpc_address>(),\n                         \"/meta_test/101/duplication/1\");\n    ASSERT_TRUE(dup.is_invalid_status());\n\n    dup.start();\n    dup.persist_status();\n    ASSERT_FALSE(dup.is_invalid_status());\n\n    ASSERT_EQ(dup.alter_status(duplication_status::DS_APP), ERR_OK);\n    dup.persist_status();\n    ASSERT_FALSE(dup.is_invalid_status());\n\n    ASSERT_EQ(dup.alter_status(duplication_status::DS_REMOVED), ERR_OK);\n    dup.persist_status();\n    ASSERT_TRUE(dup.is_invalid_status());\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/ford_fulkerson_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include \"meta/load_balance_policy.h\"\n\nnamespace dsn {\nnamespace replication {\nTEST(ford_fulkerson, build_failure)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    node_mapper nodes;\n    node_state ns;\n    ns.put_partition(gpid(app_id, 0), true);\n    nodes[rpc_address(1, 1)] = ns;\n    nodes[rpc_address(2, 2)] = ns;\n    nodes[rpc_address(3, 3)] = ns;\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n    ASSERT_EQ(ff, nullptr);\n}\n\nTEST(ford_fulkerson, add_edge)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n    address_id[addr1] = 1;\n    address_id[addr2] = 2;\n    address_id[addr3] = 3;\n\n    node_mapper nodes;\n    node_state ns;\n    nodes[addr1] = ns;\n    nodes[addr2] = ns;\n    nodes[addr3] = ns;\n\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n    ff->add_edge(1, ns);\n    ASSERT_EQ(ff->_network[1].back(), 1);\n\n    ns.put_partition(gpid(app_id, 0), true);\n    ns.put_partition(gpid(app_id, 1), true);\n    ns.put_partition(gpid(app_id, 2), true);\n    ff->add_edge(3, ns);\n    ASSERT_EQ(ff->_network[0][3], 2);\n}\n\nTEST(ford_fulkerson, update_decree)\n{\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(2, 2);\n    auto addr3 = rpc_address(3, 3);\n\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 1;\n    std::shared_ptr<app_state> app = app_state::create(info);\n    partition_configuration pc;\n    pc.secondaries.push_back(addr2);\n    pc.secondaries.push_back(addr3);\n    app->partitions.push_back(pc);\n    app->partitions.push_back(pc);\n\n    node_mapper nodes;\n    node_state ns;\n    ns.put_partition(gpid(app_id, 0), true);\n    ns.put_partition(gpid(app_id, 1), true);\n    nodes[addr1] = ns;\n    nodes[addr2] = ns;\n    nodes[addr3] = ns;\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    address_id[addr1] = 1;\n    address_id[addr2] = 2;\n    address_id[addr3] = 3;\n\n    auto node_id = 1;\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n    ff->update_decree(node_id, ns);\n    ASSERT_EQ(ff->_network[1][2], 2);\n    ASSERT_EQ(ff->_network[1][3], 2);\n}\n\nTEST(ford_fulkerson, find_shortest_path)\n{\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(2, 2);\n    auto addr3 = rpc_address(3, 3);\n\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 2;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    partition_configuration pc;\n    pc.primary = addr1;\n    pc.secondaries.push_back(addr2);\n    pc.secondaries.push_back(addr3);\n    app->partitions[0] = pc;\n    app->partitions[1] = pc;\n\n    node_mapper nodes;\n    node_state ns1;\n    ns1.put_partition(gpid(app_id, 0), true);\n    ns1.put_partition(gpid(app_id, 1), true);\n    nodes[addr1] = ns1;\n\n    node_state ns2;\n    ns2.put_partition(gpid(app_id, 0), false);\n    ns2.put_partition(gpid(app_id, 1), false);\n    nodes[addr2] = ns2;\n    nodes[addr3] = ns2;\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    address_id[addr1] = 1;\n    address_id[addr2] = 2;\n    address_id[addr3] = 3;\n\n    /**\n     * ford fulkerson graph:\n     *             1      2      1\n     * (source) 0 ---> 1 ---> 3 ---\n     *               2 |           |\n     *                 v           v\n     *                 2 --------> 4 (sink)\n     *                      1\n     */\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n    ASSERT_EQ(ff->_network[0][0], 0);\n    ASSERT_EQ(ff->_network[0][1], 1);\n    ASSERT_EQ(ff->_network[0][2], 0);\n    ASSERT_EQ(ff->_network[0][3], 0);\n    ASSERT_EQ(ff->_network[0][4], 0);\n\n    ASSERT_EQ(ff->_network[1][0], 0);\n    ASSERT_EQ(ff->_network[1][1], 0);\n    ASSERT_EQ(ff->_network[1][2], 2);\n    ASSERT_EQ(ff->_network[1][3], 2);\n    ASSERT_EQ(ff->_network[1][4], 0);\n\n    ASSERT_EQ(ff->_network[2][0], 0);\n    ASSERT_EQ(ff->_network[2][1], 0);\n    ASSERT_EQ(ff->_network[2][2], 0);\n    ASSERT_EQ(ff->_network[2][3], 0);\n    ASSERT_EQ(ff->_network[2][4], 1);\n\n    ASSERT_EQ(ff->_network[3][0], 0);\n    ASSERT_EQ(ff->_network[3][1], 0);\n    ASSERT_EQ(ff->_network[3][2], 0);\n    ASSERT_EQ(ff->_network[3][3], 0);\n    ASSERT_EQ(ff->_network[3][4], 1);\n\n    ASSERT_EQ(ff->_network[4][0], 0);\n    ASSERT_EQ(ff->_network[4][1], 0);\n    ASSERT_EQ(ff->_network[4][2], 0);\n    ASSERT_EQ(ff->_network[4][3], 0);\n    ASSERT_EQ(ff->_network[4][4], 0);\n\n    /**\n     * shortest path:\n     *         1      1      1\n     *      0 ---> 1 ---> 2 ---> 4\n     *  (source)               (sink)\n     */\n    auto flow_path = ff->find_shortest_path();\n    ASSERT_EQ(flow_path->_prev[4], 2);\n    ASSERT_EQ(flow_path->_flow[4], 1);\n    ASSERT_EQ(flow_path->_prev[2], 1);\n    ASSERT_EQ(flow_path->_flow[2], 1);\n    ASSERT_EQ(flow_path->_prev[1], 0);\n    ASSERT_EQ(flow_path->_flow[1], 1);\n}\n\nTEST(ford_fulkerson, max_value_pos)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n    address_id[addr1] = 1;\n    address_id[addr2] = 2;\n    address_id[addr3] = 3;\n\n    node_mapper nodes;\n    node_state ns;\n    nodes[addr1] = ns;\n    nodes[addr2] = ns;\n    nodes[addr3] = ns;\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n\n    std::vector<bool> visit(5, false);\n    std::vector<int> flow(5, 0);\n    auto pos = ff->max_value_pos(visit, flow);\n    ASSERT_EQ(pos, -1);\n\n    flow[1] = 3;\n    flow[2] = 5;\n    pos = ff->max_value_pos(visit, flow);\n    ASSERT_EQ(pos, 2);\n\n    visit[2] = true;\n    pos = ff->max_value_pos(visit, flow);\n    ASSERT_EQ(pos, 1);\n}\n\nTEST(ford_fulkerson, select_node)\n{\n    int32_t app_id = 1;\n    dsn::app_info info;\n    info.app_id = app_id;\n    info.partition_count = 4;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    std::unordered_map<dsn::rpc_address, int> address_id;\n    auto addr1 = rpc_address(1, 1);\n    auto addr2 = rpc_address(1, 2);\n    auto addr3 = rpc_address(1, 3);\n    address_id[addr1] = 1;\n    address_id[addr2] = 2;\n    address_id[addr3] = 3;\n\n    node_mapper nodes;\n    node_state ns;\n    nodes[addr1] = ns;\n    nodes[addr2] = ns;\n    nodes[addr3] = ns;\n    auto ff = ford_fulkerson::builder(app, nodes, address_id).build();\n\n    std::vector<bool> visit(5, false);\n    std::vector<int> flow(5, 0);\n    auto pos = ff->select_node(visit, flow);\n    ASSERT_EQ(pos, -1);\n\n    flow[1] = 3;\n    flow[2] = 5;\n    pos = ff->select_node(visit, flow);\n    ASSERT_EQ(pos, 2);\n    ASSERT_EQ(visit[pos], true);\n\n    visit[2] = true;\n    pos = ff->select_node(visit, flow);\n    ASSERT_EQ(pos, 1);\n    ASSERT_EQ(visit[pos], true);\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/json_compacity.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n#include \"meta/meta_backup_service.h\"\n#include \"meta_service_test_app.h\"\n\nnamespace dsn {\nnamespace replication {\n\nvoid meta_service_test_app::json_compacity()\n{\n    dsn::app_info info;\n    info.app_id = 1;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.expire_second = 30;\n    info.is_stateful = true;\n    info.max_replica_count = 3;\n    info.partition_count = 32;\n    info.status = dsn::app_status::AS_AVAILABLE;\n\n    dsn::app_info info2;\n\n    // 1. encoded data can be decoded\n    dsn::blob bb = dsn::json::json_forwarder<dsn::app_info>::encode(info);\n    std::cout << bb.data() << std::endl;\n    ASSERT_TRUE(dsn::json::json_forwarder<dsn::app_info>::decode(bb, info2));\n    ASSERT_EQ(info2, info);\n\n    // 2. old version of json can be decoded to new struct\n    const char *json = \"{\\\"status\\\":\\\"app_status::AS_AVAILABLE\\\",\"\n                       \"\\\"app_type\\\":\\\"pegasus\\\",\\\"app_name\\\":\\\"temp\\\",\"\n                       \"\\\"app_id\\\":1,\\\"partition_count\\\":16,\\\"envs\\\":{},\"\n                       \"\\\"is_stateful\\\":1,\\\"max_replica_count\\\":3}\";\n    dsn::json::json_forwarder<dsn::app_info>::decode(dsn::blob(json, 0, strlen(json)), info2);\n    ASSERT_EQ(info2.app_name, \"temp\");\n    ASSERT_EQ(info2.max_replica_count, 3);\n\n    // 3. older version\n    info2 = info;\n    const char *json2 = \"{\\\"status\\\":\\\"app_status::AS_AVAILABLE\\\",\"\n                        \"\\\"app_type\\\":\\\"pegasus\\\",\\\"app_name\\\":\\\"temp\\\",\"\n                        \"\\\"app_id\\\":1,\\\"partition_count\\\":16,\\\"envs\\\":{}}\";\n    dsn::json::json_forwarder<dsn::app_info>::decode(dsn::blob(json2, 0, strlen(json2)), info2);\n    ASSERT_EQ(info2.app_name, \"temp\");\n    ASSERT_EQ(info2.app_type, \"pegasus\");\n    ASSERT_EQ(info2.partition_count, 16);\n\n    // 4. old pc version\n    const char *json3 = \"{\\\"pid\\\":\\\"1.1\\\",\\\"ballot\\\":234,\\\"max_replica_count\\\":3,\"\n                        \"\\\"primary\\\":\\\"invalid address\\\",\\\"secondaries\\\":[\\\"127.0.0.1:6\\\"],\"\n                        \"\\\"last_drops\\\":[],\\\"last_committed_decree\\\":157}\";\n    dsn::partition_configuration pc;\n    dsn::json::json_forwarder<dsn::partition_configuration>::decode(\n        dsn::blob(json3, 0, strlen(json3)), pc);\n    ASSERT_EQ(234, pc.ballot);\n    ASSERT_TRUE(pc.primary.is_invalid());\n    ASSERT_EQ(1, pc.secondaries.size());\n    ASSERT_EQ(0, strcmp(pc.secondaries[0].to_string(), \"127.0.0.1:6\"));\n    ASSERT_EQ(157, pc.last_committed_decree);\n    ASSERT_EQ(0, pc.partition_flags);\n\n    // 5. not valid json\n    const char *json4 = \"{\\\"pid\\\":\\\"1.1\\\",\\\"ballot\\\":234,\\\"max_replica_count\\\":3,\"\n                        \"\\\"primary\\\":\\\"invalid address\\\",\\\"secondaries\\\":[\\\"127.0.0.1:6\\\",\"\n                        \"\\\"last_drops\\\":[],\\\"last_committed_decree\\\":157}\";\n    dsn::blob in(json4, 0, strlen(json4));\n    bool result = dsn::json::json_forwarder<dsn::partition_configuration>::decode(in, pc);\n    ASSERT_FALSE(result);\n\n    // 6 app_name with ':'\n    const char *json6 = \"{\\\"status\\\":\\\"app_status::AS_AVAILABLE\\\",\"\n                        \"\\\"app_type\\\":\\\"pegasus\\\",\\\"app_name\\\":\\\"CL769:test\\\",\"\n                        \"\\\"app_id\\\":1,\\\"partition_count\\\":16,\\\"envs\\\":{},\"\n                        \"\\\"is_stateful\\\":1,\\\"max_replica_count\\\":3}\";\n    result =\n        dsn::json::json_forwarder<dsn::app_info>::decode(dsn::blob(json6, 0, strlen(json6)), info2);\n    ASSERT_TRUE(result);\n    ASSERT_EQ(info2.app_name, \"CL769:test\");\n    ASSERT_EQ(info2.max_replica_count, 3);\n\n    // 7. policy can be decoded correctly\n    const char *json7 = \"{\\\"policy_name\\\":\\\"every_day\\\",\\\"backup_provider_type\\\":\\\"simple\\\",\\\"app_\"\n                        \"ids\\\":[4,5,6,7,8,9,10,11,14,15,16,17,18,19,21,22,23,24],\\\"app_names\\\":{\"\n                        \"\\\"4\\\":\\\"aaaa\\\",\\\"5\\\":\\\"aaaa\\\",\\\"6\\\":\\\"aaaa\\\",\\\"7\\\":\\\"aaaa\\\",\\\"8\\\":\"\n                        \"\\\"aaaa\\\",\\\"9\\\":\\\"aaaa\\\",\\\"10\\\":\\\"aaaa\\\",\\\"11\\\":\\\"aaaa\\\",\\\"14\\\":\\\"aaaa\\\",\"\n                        \"\\\"15\\\":\\\"aaaa\\\",\\\"16\\\":\\\"aaaa\\\",\\\"17\\\":\\\"aaaa\\\",\\\"18\\\":\\\"aaaa\\\",\\\"19\\\":\"\n                        \"\\\"aaaa\\\",\\\"21\\\":\\\"aaaa\\\",\\\"22\\\":\\\"aaaa\\\",\\\"23\\\":\\\"aaaa\\\",\\\"24\\\":\\\"aaaa\\\"},\"\n                        \"\\\"backup_interval_seconds\\\":86400,\\\"backup_history_count_to_keep\\\":3,\\\"is_\"\n                        \"disable\\\":0,\\\"start_time\\\":{\\\"hour\\\":0,\\\"minute\\\":30}}\";\n    dsn::replication::policy p;\n    result = dsn::json::json_forwarder<dsn::replication::policy>::decode(\n        dsn::blob(json7, 0, strlen(json7)), p);\n    ASSERT_TRUE(result);\n    ASSERT_EQ(\"every_day\", p.policy_name);\n    ASSERT_EQ(\"simple\", p.backup_provider_type);\n\n    std::set<int32_t> app_ids = {4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24};\n    ASSERT_EQ(app_ids, p.app_ids);\n\n    std::map<int32_t, std::string> app_names;\n    for (int32_t i : app_ids) {\n        app_names.emplace(i, \"aaaa\");\n    }\n    ASSERT_EQ(app_names, p.app_names);\n    ASSERT_EQ(86400, p.backup_interval_seconds);\n    ASSERT_EQ(3, p.backup_history_count_to_keep);\n    ASSERT_EQ(0, p.is_disable);\n    ASSERT_EQ(0, p.start_time.hour);\n    ASSERT_EQ(30, p.start_time.minute);\n\n    // 8. backup info can be decoded correctly\n    const char *json8 =\n        \"{\\\"backup_id\\\":1528216470578,\\\"start_time_ms\\\":1528216470578,\\\"end_time_\"\n        \"ms\\\":1528217091629,\\\"app_ids\\\":[4,5,6,7,8,9,10,11,14,15,16,17,18,19,21,22,\"\n        \"23,24],\\\"app_names\\\":{\\\"4\\\":\\\"aaaa\\\",\\\"5\\\":\\\"aaaa\\\",\\\"6\\\":\\\"aaaa\\\",\\\"7\\\":\\\"aaaa\\\",\\\"8\\\":\"\n        \"\\\"aaaa\\\",\\\"9\\\":\\\"aaaa\\\",\\\"10\\\":\\\"aaaa\\\",\\\"11\\\":\\\"aaaa\\\",\\\"14\\\":\\\"aaaa\\\",\\\"15\\\":\\\"aaaa\\\",\"\n        \"\\\"16\\\":\"\n        \"\\\"aaaa\\\",\\\"17\\\":\\\"aaaa\\\",\\\"18\\\":\\\"aaaa\\\",\\\"19\\\":\\\"aaaa\\\",\\\"21\\\":\\\"aaaa\\\",\\\"22\\\":\\\"aaaa\\\",\"\n        \"\\\"23\\\":\"\n        \"\\\"aaaa\\\",\\\"24\\\":\\\"aaaa\\\"},\\\"info_status\\\":1}\";\n    dsn::replication::backup_info binfo;\n    result = dsn::json::json_forwarder<dsn::replication::backup_info>::decode(\n        dsn::blob(json8, 0, strlen(json8)), binfo);\n    ASSERT_TRUE(result);\n    ASSERT_EQ(1528216470578, binfo.backup_id);\n    ASSERT_EQ(1528216470578, binfo.start_time_ms);\n    ASSERT_EQ(1528217091629, binfo.end_time_ms);\n    ASSERT_EQ(app_ids, binfo.app_ids);\n    ASSERT_EQ(app_names, binfo.app_names);\n    ASSERT_EQ(1, binfo.info_status);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <cmath>\n#include <dsn/service_api_cpp.h>\n#include <fstream>\n#include <gtest/gtest.h>\n#include <iostream>\n\n#include \"meta/meta_data.h\"\n#include \"meta_service_test_app.h\"\n\nint gtest_flags = 0;\nint gtest_ret = 0;\n\nnamespace dsn {\nnamespace replication {\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_META_TEST)\nDEFINE_TASK_CODE(TASK_META_TEST, TASK_PRIORITY_COMMON, THREAD_POOL_META_TEST)\n\nmeta_service_test_app *g_app;\n\n// as it is not easy to clean test environment in some cases, we simply run these tests in several\n// commands,\n// please check the script \"run.sh\" to modify the GTEST_FILTER\n// currently, three filters are used to run these tests:\n//   1. a test only run \"meta.data_definition\", coz it use different config-file\n//   2. a test only run \"meta.apply_balancer\", coz it modify the global state of remote-storage,\n//   this conflicts meta.state_sync\n//   3. all others tests\n//\n// If adding a test which doesn't modify the global state, you should simple add your test to the\n// case3.\nTEST(meta, state_sync) { g_app->state_sync_test(); }\n\nTEST(meta, update_configuration) { g_app->update_configuration_test(); }\n\nTEST(meta, balancer_validator) { g_app->balancer_validator(); }\n\nTEST(meta, apply_balancer) { g_app->apply_balancer_test(); }\n\nTEST(meta, cannot_run_balancer_test) { g_app->cannot_run_balancer_test(); }\n\nTEST(meta, construct_apps_test) { g_app->construct_apps_test(); }\n\nTEST(meta, balance_config_file) { g_app->balance_config_file(); }\n\nTEST(meta, json_compacity) { g_app->json_compacity(); }\n\nTEST(meta, adjust_dropped_size) { g_app->adjust_dropped_size(); }\n\nTEST(meta, app_envs_basic_test) { g_app->app_envs_basic_test(); }\n\ndsn::error_code meta_service_test_app::start(const std::vector<std::string> &args)\n{\n    uint32_t seed =\n        (uint32_t)dsn_config_get_value_uint64(\"tools.simulator\", \"random_seed\", 0, \"random seed\");\n    if (seed == 0) {\n        seed = time(0);\n        derror(\"initial seed: %u\", seed);\n    }\n    srand(seed);\n\n    int argc = args.size();\n    char *argv[20];\n    for (int i = 0; i < argc; ++i) {\n        argv[i] = (char *)(args[i].c_str());\n    }\n    testing::InitGoogleTest(&argc, argv);\n    g_app = this;\n    gtest_ret = RUN_ALL_TESTS();\n    gtest_flags = 1;\n    return dsn::ERR_OK;\n}\n\n} // namespace replication\n} // namespace dsn\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n    dsn::service_app::register_factory<dsn::replication::meta_service_test_app>(\"test_meta\");\n    dsn::service::meta_service_app::register_all();\n    dsn_run_config(\"config-test.ini\", false);\n    while (gtest_flags == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(gtest_ret);\n#endif\n    return gtest_ret;\n}\n"
  },
  {
    "path": "src/meta/test/meta_app_envs_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/dist/replication/replication_types.h>\n#include \"meta_test_base.h\"\n#include \"meta/meta_service.h\"\n\nnamespace dsn {\nnamespace replication {\nclass meta_app_envs_test : public meta_test_base\n{\npublic:\n    meta_app_envs_test() {}\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        create_app(app_name);\n    }\n\n    void TearDown() override { drop_app(app_name); }\n\n    const std::string app_name = \"test_app_env\";\n};\n\nTEST_F(meta_app_envs_test, update_app_envs_test)\n{\n    struct test_case\n    {\n        std::string env_key;\n        std::string env_value;\n        error_code err;\n        std::string hint;\n        std::string expect_value;\n    } tests[] = {\n        {replica_envs::WRITE_QPS_THROTTLING, \"100*delay*100\", ERR_OK, \"\", \"100*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING, \"20K*delay*100\", ERR_OK, \"\", \"20K*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING, \"20M*delay*100\", ERR_OK, \"\", \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"20A*delay*100\",\n         ERR_INVALID_PARAMETERS,\n         \"20A should be non-negative int\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"-20*delay*100\",\n         ERR_INVALID_PARAMETERS,\n         \"-20 should be non-negative int\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"\",\n         ERR_INVALID_PARAMETERS,\n         \"The value shouldn't be empty\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"20A*delay\",\n         ERR_INVALID_PARAMETERS,\n         \"The field count of 20A*delay should be 3\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"20K*pass*100\",\n         ERR_INVALID_PARAMETERS,\n         \"pass should be \\\"delay\\\" or \\\"reject\\\"\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"20K*delay*-100\",\n         ERR_INVALID_PARAMETERS,\n         \"-100 should be non-negative int\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"2K**delay*100\",\n         ERR_INVALID_PARAMETERS,\n         \"The field count of 2K**delay*100 should be 3\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"2K*delay**100\",\n         ERR_INVALID_PARAMETERS,\n         \"The field count of 2K*delay**100 should be 3\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"2K*delay*100,3K*delay*100\",\n         ERR_INVALID_PARAMETERS,\n         \"duplicate delay config\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING,\n         \"2K*reject*100,3K*reject*100\",\n         ERR_INVALID_PARAMETERS,\n         \"duplicate reject config\",\n         \"20M*delay*100\"},\n        {replica_envs::WRITE_QPS_THROTTLING, \"20M*reject*100\", ERR_OK, \"\", \"20M*reject*100\"},\n        {replica_envs::WRITE_SIZE_THROTTLING, \"300*delay*100\", ERR_OK, \"\", \"300*delay*100\"},\n        {replica_envs::SLOW_QUERY_THRESHOLD, \"30\", ERR_OK, \"\", \"30\"},\n        {replica_envs::SLOW_QUERY_THRESHOLD, \"20\", ERR_OK, \"\", \"20\"},\n        {replica_envs::SLOW_QUERY_THRESHOLD,\n         \"19\",\n         ERR_INVALID_PARAMETERS,\n         \"Slow query threshold must be >= 20ms\",\n         \"20\"},\n        {replica_envs::SLOW_QUERY_THRESHOLD,\n         \"0\",\n         ERR_INVALID_PARAMETERS,\n         \"Slow query threshold must be >= 20ms\",\n         \"20\"},\n        {replica_envs::TABLE_LEVEL_DEFAULT_TTL, \"10\", ERR_OK, \"\", \"10\"},\n        {replica_envs::ROCKSDB_USAGE_SCENARIO, \"20\", ERR_OK, \"\", \"20\"},\n        {replica_envs::ROCKSDB_CHECKPOINT_RESERVE_MIN_COUNT, \"30\", ERR_OK, \"\", \"30\"},\n        {replica_envs::ROCKSDB_CHECKPOINT_RESERVE_TIME_SECONDS, \"40\", ERR_OK, \"\", \"40\"},\n        {replica_envs::MANUAL_COMPACT_DISABLED, \"50\", ERR_OK, \"\", \"50\"},\n        {replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT, \"60\", ERR_OK, \"\", \"60\"},\n        {replica_envs::MANUAL_COMPACT_ONCE_TRIGGER_TIME, \"70\", ERR_OK, \"\", \"70\"},\n        {replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL, \"80\", ERR_OK, \"\", \"80\"},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_TRIGGER_TIME, \"90\", ERR_OK, \"\", \"90\"},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_TARGET_LEVEL, \"100\", ERR_OK, \"\", \"100\"},\n        {replica_envs::MANUAL_COMPACT_PERIODIC_BOTTOMMOST_LEVEL_COMPACTION,\n         \"200\",\n         ERR_OK,\n         \"\",\n         \"200\"},\n        {replica_envs::BUSINESS_INFO, \"300\", ERR_OK, \"\", \"300\"},\n        {replica_envs::DENY_CLIENT_REQUEST,\n         \"400\",\n         ERR_INVALID_PARAMETERS,\n         \"Invalid deny client args, valid include: timeout*all, \"\n         \"timeout*write, timeout*read; reconfig*all, reconfig*write, \"\n         \"reconfig*read\",\n         \"400\"},\n        {replica_envs::DENY_CLIENT_REQUEST,\n         \"invalid*all\",\n         ERR_INVALID_PARAMETERS,\n         \"Invalid deny client args, valid include: timeout*all, \"\n         \"timeout*write, timeout*read; reconfig*all, reconfig*write, \"\n         \"reconfig*read\",\n         \"invalid*all\"},\n        {replica_envs::DENY_CLIENT_REQUEST,\n         \"timeout*invalid\",\n         ERR_INVALID_PARAMETERS,\n         \"Invalid deny client args, valid include: timeout*all, \"\n         \"timeout*write, timeout*read; reconfig*all, reconfig*write, \"\n         \"reconfig*read\",\n         \"timeout*invalid\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"reconfig*all\", ERR_OK, \"\", \"reconfig*all\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"reconfig*write\", ERR_OK, \"\", \"reconfig*write\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"reconfig*read\", ERR_OK, \"\", \"reconfig*read\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"timeout*all\", ERR_OK, \"\", \"timeout*all\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"timeout*write\", ERR_OK, \"\", \"timeout*write\"},\n        {replica_envs::DENY_CLIENT_REQUEST, \"timeout*read\", ERR_OK, \"\", \"timeout*read\"},\n        {\"not_exist_env\",\n         \"500\",\n         ERR_INVALID_PARAMETERS,\n         \"app_env \\\"not_exist_env\\\" is not supported\",\n         \"\"}};\n\n    auto app = find_app(app_name);\n    for (auto test : tests) {\n        configuration_update_app_env_response response =\n            update_app_envs(app_name, {test.env_key}, {test.env_value});\n\n        ASSERT_EQ(response.err, test.err);\n        ASSERT_EQ(response.hint_message, test.hint);\n        if (app->envs.find(test.env_key) != app->envs.end()) {\n            ASSERT_EQ(app->envs.at(test.env_key), test.expect_value);\n        }\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_app_operation_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/service_api_c.h>\n#include <dsn/utility/defer.h>\n\n#include \"meta_service_test_app.h\"\n#include \"meta_test_base.h\"\n#include \"meta/meta_split_service.h\"\n#include \"misc/misc.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint64(min_live_node_count_for_unfreeze);\nDSN_DECLARE_int32(min_allowed_replica_count);\nDSN_DECLARE_int32(max_allowed_replica_count);\n\nclass meta_app_operation_test : public meta_test_base\n{\npublic:\n    meta_app_operation_test() {}\n\n    error_code create_app_test(int32_t partition_count,\n                               int32_t replica_count,\n                               bool success_if_exist,\n                               const std::string &app_name)\n    {\n        configuration_create_app_request create_request;\n        configuration_create_app_response create_response;\n        create_request.app_name = app_name;\n        create_request.options.app_type = \"simple_kv\";\n        create_request.options.partition_count = partition_count;\n        create_request.options.replica_count = replica_count;\n        create_request.options.success_if_exist = success_if_exist;\n        create_request.options.is_stateful = true;\n\n        auto result = fake_create_app(_ss.get(), create_request);\n        fake_wait_rpc(result, create_response);\n        return create_response.err;\n    }\n\n    error_code drop_app_test(const std::string &app_name)\n    {\n        configuration_drop_app_request drop_request;\n        configuration_drop_app_response drop_response;\n        drop_request.app_name = app_name;\n        drop_request.options.success_if_not_exist = false;\n\n        auto result = fake_drop_app(_ss.get(), drop_request);\n        fake_wait_rpc(result, drop_response);\n        if (drop_response.err == ERR_OK) {\n            _ss->spin_wait_staging(30);\n        }\n        return drop_response.err;\n    }\n\n    error_code recall_app_test(const std::string &new_app_name, int32_t app_id)\n    {\n        configuration_recall_app_request recall_request;\n        configuration_recall_app_response recall_response;\n\n        recall_request.app_id = app_id;\n        recall_request.new_app_name = new_app_name;\n        auto result = fake_recall_app(_ss.get(), recall_request);\n        fake_wait_rpc(result, recall_response);\n        if (recall_response.err == ERR_OK) {\n            _ss->spin_wait_staging(30);\n        }\n        return recall_response.err;\n    }\n\n    void update_app_status(app_status::type status)\n    {\n        auto app = find_app(APP_NAME);\n        app->status = status;\n    }\n\n    void drop_app_with_expired()\n    {\n        auto app_id = find_app(APP_NAME)->app_id;\n        drop_app(APP_NAME);\n\n        // dropped app can only be find by app_id\n        auto app = _ss->get_app(app_id);\n        // hold_seconds_for_dropped_app = 604800 in unit test config\n        // make app expired immediatly\n        app->expire_second -= 604800;\n    }\n\n    void clear_nodes() { _ss->_nodes.clear(); }\n\n    configuration_get_max_replica_count_response get_max_replica_count(const std::string &app_name)\n    {\n        auto req = dsn::make_unique<configuration_get_max_replica_count_request>();\n        req->__set_app_name(app_name);\n\n        configuration_get_max_replica_count_rpc rpc(std::move(req), RPC_CM_GET_MAX_REPLICA_COUNT);\n        _ss->get_max_replica_count(rpc);\n        _ss->wait_all_task();\n\n        return rpc.response();\n    }\n\n    void set_partition_max_replica_count(const std::string &app_name,\n                                         int32_t partition_index,\n                                         int32_t max_replica_count)\n    {\n        auto app = find_app(app_name);\n        dassert_f(app != nullptr, \"app({}) does not exist\", app_name);\n\n        auto &partition_config = app->partitions[partition_index];\n        partition_config.max_replica_count = max_replica_count;\n    }\n\n    void set_max_replica_count_env(const std::string &app_name, const std::string &env)\n    {\n        auto app = find_app(app_name);\n        dassert_f(app != nullptr, \"app({}) does not exist\", app_name);\n\n        if (env.empty()) {\n            app->envs.erase(replica_envs::UPDATE_MAX_REPLICA_COUNT);\n        } else {\n            app->envs[replica_envs::UPDATE_MAX_REPLICA_COUNT] = env;\n        }\n\n        // set remote env of app\n        auto app_path = _ss->get_app_path(*app);\n        auto ainfo = *(reinterpret_cast<app_info *>(app.get()));\n        auto json_config = dsn::json::json_forwarder<app_info>::encode(ainfo);\n        dsn::task_tracker tracker;\n        _ms->get_remote_storage()->set_data(app_path,\n                                            json_config,\n                                            LPC_META_STATE_HIGH,\n                                            [](dsn::error_code ec) { ASSERT_EQ(ec, ERR_OK); },\n                                            &tracker);\n        tracker.wait_outstanding_tasks();\n    }\n\n    configuration_set_max_replica_count_response set_max_replica_count(const std::string &app_name,\n                                                                       int32_t max_replica_count)\n    {\n        auto req = dsn::make_unique<configuration_set_max_replica_count_request>();\n        req->__set_app_name(app_name);\n        req->__set_max_replica_count(max_replica_count);\n\n        configuration_set_max_replica_count_rpc rpc(std::move(req), RPC_CM_SET_MAX_REPLICA_COUNT);\n        _ss->set_max_replica_count(rpc);\n        _ss->wait_all_task();\n\n        return rpc.response();\n    }\n\n    void set_app_and_all_partitions_max_replica_count(const std::string &app_name,\n                                                      int32_t max_replica_count)\n    {\n        auto app = find_app(app_name);\n        dassert_f(app != nullptr, \"app({}) does not exist\", app_name);\n\n        auto partition_size = static_cast<int>(app->partitions.size());\n        for (int i = 0; i < partition_size; ++i) {\n            // set local max_replica_count of each partition\n            auto &partition_config = app->partitions[i];\n            partition_config.max_replica_count = max_replica_count;\n\n            // set remote max_replica_count of each partition\n            auto partition_path = _ss->get_partition_path(partition_config.pid);\n            auto json_config =\n                dsn::json::json_forwarder<partition_configuration>::encode(partition_config);\n            dsn::task_tracker tracker;\n            _ms->get_remote_storage()->set_data(partition_path,\n                                                json_config,\n                                                LPC_META_STATE_HIGH,\n                                                [](dsn::error_code ec) { ASSERT_EQ(ec, ERR_OK); },\n                                                &tracker);\n            tracker.wait_outstanding_tasks();\n        }\n\n        // set local max_replica_count of app\n        app->max_replica_count = max_replica_count;\n\n        // set remote max_replica_count of app\n        auto app_path = _ss->get_app_path(*app);\n        auto ainfo = *(reinterpret_cast<app_info *>(app.get()));\n        auto json_config = dsn::json::json_forwarder<app_info>::encode(ainfo);\n        dsn::task_tracker tracker;\n        _ms->get_remote_storage()->set_data(app_path,\n                                            json_config,\n                                            LPC_META_STATE_HIGH,\n                                            [](dsn::error_code ec) { ASSERT_EQ(ec, ERR_OK); },\n                                            &tracker);\n        tracker.wait_outstanding_tasks();\n    }\n\n    void verify_all_partitions_max_replica_count(const std::string &app_name,\n                                                 int32_t expected_max_replica_count)\n    {\n        auto app = find_app(app_name);\n        dassert_f(app != nullptr, \"app({}) does not exist\", app_name);\n\n        auto partition_size = static_cast<int>(app->partitions.size());\n        for (int i = 0; i < partition_size; ++i) {\n            // verify local max_replica_count of each partition\n            auto &partition_config = app->partitions[i];\n            ASSERT_EQ(partition_config.max_replica_count, expected_max_replica_count);\n\n            // verify remote max_replica_count of each partition\n            auto partition_path = _ss->get_partition_path(partition_config.pid);\n            dsn::task_tracker tracker;\n            _ms->get_remote_storage()->get_data(\n                partition_path,\n                LPC_META_CALLBACK,\n                [ expected_pid = partition_config.pid,\n                  expected_max_replica_count ](error_code ec, const blob &value) {\n                    ASSERT_EQ(ec, ERR_OK);\n\n                    partition_configuration partition_config;\n                    dsn::json::json_forwarder<partition_configuration>::decode(value,\n                                                                               partition_config);\n\n                    ASSERT_EQ(partition_config.pid, expected_pid);\n                    ASSERT_EQ(partition_config.max_replica_count, expected_max_replica_count);\n                },\n                &tracker);\n            tracker.wait_outstanding_tasks();\n        }\n    }\n\n    void verify_app_max_replica_count(const std::string &app_name,\n                                      int32_t expected_max_replica_count)\n    {\n        auto app = find_app(app_name);\n        dassert_f(app != nullptr, \"app({}) does not exist\", app_name);\n\n        // verify local max_replica_count of the app\n        ASSERT_EQ(app->max_replica_count, expected_max_replica_count);\n        // env of max_replica_count should have been removed under normal circumstances\n        ASSERT_EQ(app->envs.find(replica_envs::UPDATE_MAX_REPLICA_COUNT), app->envs.end());\n\n        // verify remote max_replica_count of the app\n        auto app_path = _ss->get_app_path(*app);\n        dsn::task_tracker tracker;\n        _ms->get_remote_storage()->get_data(\n            app_path,\n            LPC_META_CALLBACK,\n            [app, expected_max_replica_count](error_code ec, const blob &value) {\n                ASSERT_EQ(ec, ERR_OK);\n\n                app_info ainfo;\n                dsn::json::json_forwarder<app_info>::decode(value, ainfo);\n\n                ASSERT_EQ(ainfo.app_name, app->app_name);\n                ASSERT_EQ(ainfo.app_id, app->app_id);\n                ASSERT_EQ(ainfo.max_replica_count, expected_max_replica_count);\n                // env of max_replica_count should have been removed under normal circumstances\n                ASSERT_EQ(ainfo.envs.find(replica_envs::UPDATE_MAX_REPLICA_COUNT),\n                          ainfo.envs.end());\n            },\n            &tracker);\n        tracker.wait_outstanding_tasks();\n    }\n\n    const std::string APP_NAME = \"app_operation_test\";\n    const std::string OLD_APP_NAME = \"old_app_operation\";\n};\n\nTEST_F(meta_app_operation_test, create_app)\n{\n    // Test cases: (assert min_allowed_replica_count <= max_allowed_replica_count)\n    // - wrong partition_count (< 0)\n    // - wrong partition_count (= 0)\n    // - wrong replica_count (< 0)\n    // - wrong replica_count (= 0)\n    // - wrong replica_count (> max_allowed_replica_count > alive_node_count)\n    // - wrong replica_count (> alive_node_count > max_allowed_replica_count)\n    // - wrong replica_count (> alive_node_count = max_allowed_replica_count)\n    // - wrong replica_count (= max_allowed_replica_count, and > alive_node_count)\n    // - wrong replica_count (< max_allowed_replica_count, and > alive_node_count)\n    // - wrong replica_count (= alive_node_count, and > max_allowed_replica_count)\n    // - wrong replica_count (< alive_node_count, and > max_allowed_replica_count)\n    // - valid replica_count (= max_allowed_replica_count, and = alive_node_count)\n    // - valid replica_count (= max_allowed_replica_count, and < alive_node_count)\n    // - valid replica_count (< max_allowed_replica_count, and = alive_node_count)\n    // - valid replica_count (< max_allowed_replica_count < alive_node_count)\n    // - valid replica_count (< alive_node_count < max_allowed_replica_count)\n    // - valid replica_count (< alive_node_count = max_allowed_replica_count)\n    // - wrong replica_count (< min_allowed_replica_count < alive_node_count)\n    // - wrong replica_count (< alive_node_count < min_allowed_replica_count)\n    // - wrong replica_count (< min_allowed_replica_count = alive_node_count)\n    // - wrong replica_count (< min_allowed_replica_count, and > alive_node_count)\n    // - wrong replica_count (< min_allowed_replica_count, and = alive_node_count)\n    // - wrong replica_count (= min_allowed_replica_count, and > alive_node_count)\n    // - valid replica_count (= min_allowed_replica_count, and < alive_node_count)\n    // - cluster freezed (alive_node_count = 0)\n    // - cluster freezed (alive_node_count = 1 < min_live_node_count_for_unfreeze)\n    // - cluster freezed (alive_node_count = 2 < min_live_node_count_for_unfreeze)\n    // - cluster not freezed (alive_node_count = min_live_node_count_for_unfreeze)\n    // - create succeed with single-replica\n    // - create succeed with double-replica\n    // - create app succeed\n    // - create failed with table existed\n    // - wrong app_status creating\n    // - wrong app_status recalling\n    // - wrong app_status dropping\n    // - create succeed with app_status dropped\n    // - create succeed with success_if_exist=true\n    struct create_test\n    {\n        std::string app_name;\n        int32_t partition_count;\n        int32_t replica_count;\n        uint64_t min_live_node_count_for_unfreeze;\n        int alive_node_count;\n        int32_t min_allowed_replica_count;\n        bool success_if_exist;\n        app_status::type before_status;\n        error_code expected_err;\n    } tests[] = {{APP_NAME, -1, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 0, 3, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, -1, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 0, 1, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 6, 2, 4, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 7, 2, 6, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 6, 2, 5, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 5, 2, 4, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 4, 2, 3, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 6, 2, 6, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 6, 2, 7, 1, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME + \"_1\", 4, 5, 2, 5, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_2\", 4, 5, 2, 6, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_3\", 4, 4, 2, 4, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_4\", 4, 4, 2, 6, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_5\", 4, 3, 2, 4, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_6\", 4, 4, 2, 5, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME, 4, 3, 2, 5, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 3, 2, 4, 5, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 3, 2, 4, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 3, 2, 2, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 3, 2, 3, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 4, 4, 2, 3, 4, false, app_status::AS_INVALID, ERR_INVALID_PARAMETERS},\n                 {APP_NAME + \"_7\", 4, 3, 2, 4, 3, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME, 4, 1, 1, 0, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED},\n                 {APP_NAME, 4, 2, 2, 1, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED},\n                 {APP_NAME, 4, 3, 3, 2, 1, false, app_status::AS_INVALID, ERR_STATE_FREEZED},\n                 {APP_NAME + \"_8\", 4, 3, 3, 3, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_9\", 4, 1, 1, 1, 1, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME + \"_10\", 4, 2, 1, 2, 2, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_INVALID, ERR_OK},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_INVALID, ERR_APP_EXIST},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_CREATING, ERR_BUSY_CREATING},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_RECALLING, ERR_BUSY_CREATING},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_DROPPING, ERR_BUSY_DROPPING},\n                 {APP_NAME, 4, 3, 2, 3, 3, false, app_status::AS_DROPPED, ERR_OK},\n                 {APP_NAME, 4, 3, 2, 3, 3, true, app_status::AS_INVALID, ERR_OK}};\n\n    clear_nodes();\n\n    // keep the number of all nodes greater than that of alive nodes\n    const int total_node_count = 10;\n    std::vector<rpc_address> nodes = ensure_enough_alive_nodes(total_node_count);\n\n    // the meta function level will become freezed once\n    // alive_nodes * 100 < total_nodes * node_live_percentage_threshold_for_update\n    // even if alive_nodes >= min_live_node_count_for_unfreeze\n    set_node_live_percentage_threshold_for_update(0);\n\n    // save original FLAGS_min_live_node_count_for_unfreeze\n    auto reserved_min_live_node_count_for_unfreeze = FLAGS_min_live_node_count_for_unfreeze;\n\n    // save original FLAGS_max_allowed_replica_count\n    auto reserved_max_allowed_replica_count = FLAGS_max_allowed_replica_count;\n\n    // keep FLAGS_max_allowed_replica_count fixed in the tests\n    auto res = update_flag(\"max_allowed_replica_count\", \"5\");\n    ASSERT_TRUE(res.is_ok());\n\n    // save original FLAGS_min_allowed_replica_count\n    auto reserved_min_allowed_replica_count = FLAGS_min_allowed_replica_count;\n\n    for (auto test : tests) {\n        res = update_flag(\"min_allowed_replica_count\",\n                          std::to_string(test.min_allowed_replica_count));\n        ASSERT_TRUE(res.is_ok());\n\n        set_min_live_node_count_for_unfreeze(test.min_live_node_count_for_unfreeze);\n\n        dassert_f(total_node_count >= test.alive_node_count,\n                  \"total_node_count({}) should be >= alive_node_count({})\",\n                  total_node_count,\n                  test.alive_node_count);\n        for (int i = 0; i < total_node_count - test.alive_node_count; i++) {\n            _ms->set_node_state({nodes[i]}, false);\n        }\n\n        if (test.before_status == app_status::AS_DROPPED) {\n            update_app_status(app_status::AS_AVAILABLE);\n            drop_app(APP_NAME);\n        } else if (test.before_status != app_status::AS_INVALID) {\n            update_app_status(test.before_status);\n        }\n        auto err = create_app_test(\n            test.partition_count, test.replica_count, test.success_if_exist, test.app_name);\n        ASSERT_EQ(err, test.expected_err);\n\n        _ms->set_node_state(nodes, true);\n    }\n\n    // set FLAGS_min_allowed_replica_count successfully\n    res = update_flag(\"min_allowed_replica_count\", \"2\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_min_allowed_replica_count, 2);\n\n    // set FLAGS_max_allowed_replica_count successfully\n    res = update_flag(\"max_allowed_replica_count\", \"6\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_max_allowed_replica_count, 6);\n\n    // failed to set FLAGS_min_allowed_replica_count due to individual validation\n    res = update_flag(\"min_allowed_replica_count\", \"0\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_min_allowed_replica_count, 2);\n    std::cout << res.description() << std::endl;\n\n    // failed to set FLAGS_max_allowed_replica_count due to individual validation\n    res = update_flag(\"max_allowed_replica_count\", \"0\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_max_allowed_replica_count, 6);\n    std::cout << res.description() << std::endl;\n\n    // failed to set FLAGS_min_allowed_replica_count due to grouped validation\n    res = update_flag(\"min_allowed_replica_count\", \"7\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_min_allowed_replica_count, 2);\n    std::cout << res.description() << std::endl;\n\n    // failed to set FLAGS_max_allowed_replica_count due to grouped validation\n    res = update_flag(\"max_allowed_replica_count\", \"1\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_max_allowed_replica_count, 6);\n    std::cout << res.description() << std::endl;\n\n    // recover original FLAGS_min_allowed_replica_count\n    res = update_flag(\"min_allowed_replica_count\",\n                      std::to_string(reserved_min_allowed_replica_count));\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_min_allowed_replica_count, reserved_min_allowed_replica_count);\n\n    // recover original FLAGS_max_allowed_replica_count\n    res = update_flag(\"max_allowed_replica_count\",\n                      std::to_string(reserved_max_allowed_replica_count));\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_max_allowed_replica_count, reserved_max_allowed_replica_count);\n\n    // recover original FLAGS_min_live_node_count_for_unfreeze\n    set_min_live_node_count_for_unfreeze(reserved_min_live_node_count_for_unfreeze);\n}\n\nTEST_F(meta_app_operation_test, drop_app)\n{\n    create_app(APP_NAME);\n\n    // Test cases:\n    // - drop app not exist\n    // - wrong app_status creating\n    // - wrong app_status recalling\n    // - wrong app_status dropping\n    // - drop app succeed\n    struct drop_test\n    {\n        std::string app_name;\n        app_status::type before_status;\n        error_code expected_err;\n    } tests[] = {{\"table_not_exist\", app_status::AS_INVALID, ERR_APP_NOT_EXIST},\n                 {APP_NAME, app_status::AS_CREATING, ERR_BUSY_CREATING},\n                 {APP_NAME, app_status::AS_RECALLING, ERR_BUSY_CREATING},\n                 {APP_NAME, app_status::AS_DROPPING, ERR_BUSY_DROPPING},\n                 {APP_NAME, app_status::AS_AVAILABLE, ERR_OK}};\n\n    for (auto test : tests) {\n        if (test.before_status != app_status::AS_INVALID) {\n            update_app_status(test.before_status);\n        }\n        auto err = drop_app_test(test.app_name);\n        ASSERT_EQ(err, test.expected_err);\n    }\n}\n\nTEST_F(meta_app_operation_test, recall_app)\n{\n    create_app(OLD_APP_NAME);\n    auto old_app = find_app(OLD_APP_NAME);\n    auto old_app_id = old_app->app_id;\n    auto invalid_app_id = 100;\n\n    // Test cases:\n    // - wrong app_id\n    // - wrong app_status available\n    // - wrong app_status creating\n    // - wrong app_status recalling\n    // - wrong app_status dropping\n    // - recall succeed\n    // - recall failed because app is totally dropped\n    struct drop_test\n    {\n        int32_t app_id;\n        std::string new_app_name;\n        app_status::type before_status;\n        bool is_totally_dropped;\n        error_code expected_err;\n    } tests[] = {{invalid_app_id, APP_NAME, app_status::AS_INVALID, false, ERR_APP_NOT_EXIST},\n                 {old_app_id, OLD_APP_NAME, app_status::AS_AVAILABLE, false, ERR_APP_EXIST},\n                 {old_app_id, APP_NAME, app_status::AS_CREATING, false, ERR_BUSY_CREATING},\n                 {old_app_id, APP_NAME, app_status::AS_RECALLING, false, ERR_BUSY_CREATING},\n                 {old_app_id, APP_NAME, app_status::AS_DROPPING, false, ERR_BUSY_DROPPING},\n                 {old_app_id, APP_NAME, app_status::AS_DROPPED, false, ERR_OK},\n                 {old_app_id, APP_NAME, app_status::AS_DROPPED, true, ERR_APP_NOT_EXIST}};\n\n    for (auto test : tests) {\n        if (!test.is_totally_dropped) {\n            if (test.before_status == app_status::AS_DROPPED) {\n                old_app->status = app_status::AS_AVAILABLE;\n                drop_app(OLD_APP_NAME);\n            } else if (test.before_status != app_status::AS_INVALID) {\n                old_app->status = test.before_status;\n            }\n        } else {\n            drop_app_with_expired();\n        }\n        auto err = recall_app_test(test.new_app_name, test.app_id);\n        ASSERT_EQ(err, test.expected_err);\n    }\n}\n\nTEST_F(meta_app_operation_test, get_max_replica_count)\n{\n    const uint32_t partition_count = 4;\n    create_app(APP_NAME, partition_count);\n\n    // Test cases:\n    // - get max_replica_count from a non-existent table\n    // - get max_replica_count from an inconsistent table\n    // - get max_replica_count successfully\n    struct test_case\n    {\n        std::string app_name;\n        error_code expected_err;\n        int32_t expected_max_replica_count;\n    } tests[] = {{\"abc_xyz\", ERR_APP_NOT_EXIST, 0},\n                 {APP_NAME, ERR_INCONSISTENT_STATE, 0},\n                 {APP_NAME, ERR_OK, 3}};\n\n    for (const auto &test : tests) {\n        std::cout << \"test get_max_replica_count: \"\n                  << \"app_name=\" << test.app_name << \", expected_err=\" << test.expected_err\n                  << \", expected_max_replica_count=\" << test.expected_max_replica_count\n                  << std::endl;\n\n        std::function<void()> recover_partition_max_replica_count = []() {};\n\n        if (test.expected_err == ERR_INCONSISTENT_STATE) {\n            auto partition_index = static_cast<int32_t>(random32(0, partition_count - 1));\n            set_partition_max_replica_count(test.app_name, partition_index, 2);\n            recover_partition_max_replica_count =\n                [ this, app_name = test.app_name, partition_index ]()\n            {\n                set_partition_max_replica_count(app_name, partition_index, 3);\n            };\n        }\n\n        const auto resp = get_max_replica_count(test.app_name);\n        ASSERT_EQ(resp.err, test.expected_err);\n        ASSERT_EQ(resp.max_replica_count, test.expected_max_replica_count);\n\n        recover_partition_max_replica_count();\n    }\n}\n\nTEST_F(meta_app_operation_test, set_max_replica_count)\n{\n    const uint32_t partition_count = 4;\n    create_app(APP_NAME, partition_count);\n\n    // Test cases:\n    // - set max_replica_count for a non-existent table\n    // - set max_replica_count for an inconsistent table\n    // - set with wrong max_replica_count (< 0)\n    // - set with wrong max_replica_count (= 0)\n    // - set with wrong max_replica_count (> max_allowed_replica_count > alive_node_count)\n    // - set with wrong max_replica_count (> alive_node_count > max_allowed_replica_count)\n    // - set with wrong max_replica_count (> alive_node_count = max_allowed_replica_count)\n    // - set with wrong max_replica_count (= max_allowed_replica_count, and > alive_node_count)\n    // - set with wrong max_replica_count (< max_allowed_replica_count, and > alive_node_count)\n    // - set with wrong max_replica_count (= alive_node_count, and > max_allowed_replica_count)\n    // - set with wrong max_replica_count (< alive_node_count, and > max_allowed_replica_count)\n    // - set with wrong max_replica_count (< min_allowed_replica_count < alive_node_count)\n    // - set with wrong max_replica_count (< alive_node_count < min_allowed_replica_count)\n    // - set with wrong max_replica_count (< min_allowed_replica_count = alive_node_count)\n    // - set with wrong max_replica_count (< min_allowed_replica_count, and > alive_node_count)\n    // - set with wrong max_replica_count (< min_allowed_replica_count, and = alive_node_count)\n    // - set with wrong max_replica_count (= min_allowed_replica_count, and > alive_node_count)\n    // - cluster is freezed (alive_node_count = 0)\n    // - cluster is freezed (alive_node_count = 1 < min_live_node_count_for_unfreeze)\n    // - cluster is freezed (alive_node_count = 2 < min_live_node_count_for_unfreeze)\n    // - request is rejected once there has been already an unfinished update\n    // - increase with valid max_replica_count (= max_allowed_replica_count, and = alive_node_count)\n    // - decrease with valid max_replica_count (= max_allowed_replica_count, and = alive_node_count)\n    // - unchanged valid max_replica_count (= max_allowed_replica_count, and = alive_node_count)\n    // - increase with valid max_replica_count (= max_allowed_replica_count, and < alive_node_count)\n    // - decrease with valid max_replica_count (= max_allowed_replica_count, and < alive_node_count)\n    // - unchanged valid max_replica_count (= max_allowed_replica_count, and < alive_node_count)\n    // - increase with valid max_replica_count (< max_allowed_replica_count, and = alive_node_count)\n    // - decrease with valid max_replica_count (< max_allowed_replica_count, and = alive_node_count)\n    // - unchanged valid max_replica_count (< max_allowed_replica_count, and = alive_node_count)\n    // - decrease with valid max_replica_count (< max_allowed_replica_count < alive_node_count)\n    // - unchanged valid max_replica_count (< max_allowed_replica_count < alive_node_count)\n    // - decrease with valid max_replica_count (< alive_node_count < max_allowed_replica_count)\n    // - unchanged valid max_replica_count (< alive_node_count < max_allowed_replica_count)\n    // - increase with valid max_replica_count (< alive_node_count = max_allowed_replica_count)\n    // - decrease with valid max_replica_count (< alive_node_count = max_allowed_replica_count)\n    // - unchanged valid max_replica_count (< alive_node_count = max_allowed_replica_count)\n    // - increase with valid max_replica_count (= min_allowed_replica_count, and < alive_node_count)\n    // - decrease with valid max_replica_count (= min_allowed_replica_count, and < alive_node_count)\n    // - unchanged valid max_replica_count (= min_allowed_replica_count, and < alive_node_count)\n    // - increase max_replica_count from 2 to 3\n    // - increase max_replica_count from 1 to 3\n    // - decrease max_replica_count from 3 to 1\n    struct test_case\n    {\n        std::string app_name;\n        int32_t expected_old_max_replica_count;\n        int32_t initial_max_replica_count;\n        int32_t new_max_replica_count;\n        uint64_t min_live_node_count_for_unfreeze;\n        int alive_node_count;\n        int32_t min_allowed_replica_count;\n        int32_t max_allowed_replica_count;\n        std::string env;\n        error_code expected_err;\n    } tests[] = {{\"abc_xyz\", 0, 3, 3, 2, 3, 1, 3, \"\", ERR_APP_NOT_EXIST},\n                 {APP_NAME, 0, 3, 3, 2, 3, 1, 3, \"\", ERR_INCONSISTENT_STATE},\n                 {APP_NAME, 3, 3, -1, 2, 3, 1, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 3, 3, 0, 2, 3, 1, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 3, 1, 1, 1, 2, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 3, 1, 2, 1, 1, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 1, 1, 1, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 1, 1, 2, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 1, 1, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 2, 1, 1, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 3, 1, 1, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 2, 2, 1, 1, 3, 2, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 3, 3, 1, 1, 2, 3, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 3, 3, 2, 1, 3, 3, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 3, 3, 2, 1, 1, 3, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 3, 3, 2, 1, 2, 3, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 2, 2, 2, 1, 1, 2, 3, \"\", ERR_INVALID_PARAMETERS},\n                 {APP_NAME, 1, 1, 2, 1, 0, 1, 3, \"\", ERR_STATE_FREEZED},\n                 {APP_NAME, 1, 1, 2, 2, 1, 1, 3, \"\", ERR_STATE_FREEZED},\n                 {APP_NAME, 1, 1, 2, 3, 2, 1, 3, \"\", ERR_STATE_FREEZED},\n                 {APP_NAME, 2, 2, 3, 2, 3, 2, 3, \"updating;3\", ERR_OPERATION_DISABLED},\n                 {APP_NAME, 1, 1, 2, 1, 2, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 1, 1, 1, 1, 1, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 2, 1, 2, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 2, 1, 3, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 1, 1, 2, 1, 1, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 2, 1, 3, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 2, 1, 2, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 3, 3, 2, 1, 2, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 2, 1, 2, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 1, 1, 3, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 1, 1, 3, 1, 2, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 1, 1, 2, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 1, 1, 2, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 2, 1, 3, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 3, 3, 2, 1, 3, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 2, 1, 3, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 2, 1, 3, 2, 3, \"\", ERR_OK},\n                 {APP_NAME, 3, 3, 2, 1, 3, 2, 3, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 2, 1, 3, 2, 3, \"\", ERR_OK},\n                 {APP_NAME, 2, 2, 3, 2, 3, 2, 3, \"\", ERR_OK},\n                 {APP_NAME, 1, 1, 3, 2, 3, 1, 3, \"\", ERR_OK},\n                 {APP_NAME, 3, 3, 1, 2, 3, 1, 3, \"\", ERR_OK}};\n\n    const int32_t total_node_count = 3;\n    auto nodes = ensure_enough_alive_nodes(total_node_count);\n\n    for (const auto &test : tests) {\n        std::cout << \"test set_max_replica_count: \"\n                  << \"app_name=\" << test.app_name\n                  << \", expected_old_max_replica_count=\" << test.expected_old_max_replica_count\n                  << \", initial_max_replica_count=\" << test.initial_max_replica_count\n                  << \", new_max_replica_count=\" << test.new_max_replica_count\n                  << \", min_live_node_count_for_unfreeze=\" << test.min_live_node_count_for_unfreeze\n                  << \", alive_node_count=\" << test.alive_node_count\n                  << \", min_allowed_replica_count=\" << test.min_allowed_replica_count\n                  << \", max_allowed_replica_count=\" << test.max_allowed_replica_count\n                  << \", expected_err=\" << test.expected_err << std::endl;\n\n        // disable node_live_percentage_threshold_for_update\n        // for the reason that the meta function level will become freezed once\n        // alive_nodes * 100 < total_nodes * node_live_percentage_threshold_for_update\n        // even if alive_nodes >= min_live_node_count_for_unfreeze\n        set_node_live_percentage_threshold_for_update(0);\n\n        if (test.expected_err != ERR_APP_NOT_EXIST) {\n            set_max_replica_count_env(test.app_name, test.env);\n            // set the initial max_replica_count for the app and all of its partitions\n            set_app_and_all_partitions_max_replica_count(test.app_name,\n                                                         test.initial_max_replica_count);\n\n            const auto resp = get_max_replica_count(test.app_name);\n            ASSERT_EQ(resp.err, ERR_OK);\n            ASSERT_EQ(resp.max_replica_count, test.initial_max_replica_count);\n        }\n\n        // recover automatically the original FLAGS_min_live_node_count_for_unfreeze,\n        // FLAGS_min_allowed_replica_count and FLAGS_max_allowed_replica_count\n        auto recover = defer([\n            reserved_min_live_node_count_for_unfreeze = FLAGS_min_live_node_count_for_unfreeze,\n            reserved_min_allowed_replica_count = FLAGS_min_allowed_replica_count,\n            reserved_max_allowed_replica_count = FLAGS_max_allowed_replica_count\n        ]() {\n            FLAGS_max_allowed_replica_count = reserved_max_allowed_replica_count;\n            FLAGS_min_allowed_replica_count = reserved_min_allowed_replica_count;\n            FLAGS_min_live_node_count_for_unfreeze = reserved_min_live_node_count_for_unfreeze;\n        });\n        FLAGS_min_live_node_count_for_unfreeze = test.min_live_node_count_for_unfreeze;\n        FLAGS_min_allowed_replica_count = test.min_allowed_replica_count;\n        FLAGS_max_allowed_replica_count = test.max_allowed_replica_count;\n\n        // set some nodes unalive to match the expected number of alive ndoes\n        dassert_f(total_node_count >= test.alive_node_count,\n                  \"total_node_count({}) should be >= alive_node_count({})\",\n                  total_node_count,\n                  test.alive_node_count);\n        for (int i = 0; i < total_node_count - test.alive_node_count; i++) {\n            _ms->set_node_state({nodes[i]}, false);\n        }\n\n        // choose and set a partition randomly with an inconsistent max_replica_count\n        if (test.expected_err == ERR_INCONSISTENT_STATE) {\n            auto partition_index = static_cast<int32_t>(random32(0, partition_count - 1));\n            set_partition_max_replica_count(\n                test.app_name, partition_index, test.initial_max_replica_count + 1);\n        }\n\n        const auto set_resp = set_max_replica_count(test.app_name, test.new_max_replica_count);\n        ASSERT_EQ(set_resp.err, test.expected_err);\n        ASSERT_EQ(set_resp.old_max_replica_count, test.expected_old_max_replica_count);\n        if (test.expected_err == ERR_OK) {\n            verify_all_partitions_max_replica_count(test.app_name, test.new_max_replica_count);\n            verify_app_max_replica_count(test.app_name, test.new_max_replica_count);\n        }\n\n        const auto get_resp = get_max_replica_count(test.app_name);\n        if (test.expected_err == ERR_APP_NOT_EXIST || test.expected_err == ERR_INCONSISTENT_STATE) {\n            ASSERT_EQ(get_resp.err, test.expected_err);\n        } else if (test.expected_err != ERR_OK) {\n            ASSERT_EQ(get_resp.err, ERR_OK);\n        }\n\n        if (test.expected_err != ERR_OK) {\n            ASSERT_EQ(get_resp.max_replica_count, test.expected_old_max_replica_count);\n        }\n\n        _ms->set_node_state(nodes, true);\n    }\n}\n\nTEST_F(meta_app_operation_test, recover_from_max_replica_count_env)\n{\n    const uint32_t partition_count = 4;\n    create_app(APP_NAME, partition_count);\n\n    const int32_t new_max_replica_count = 5;\n    const auto env = fmt::format(\"updating;{}\", new_max_replica_count);\n    set_max_replica_count_env(APP_NAME, env);\n\n    _ss->recover_from_max_replica_count_env();\n\n    verify_all_partitions_max_replica_count(APP_NAME, new_max_replica_count);\n    verify_app_max_replica_count(APP_NAME, new_max_replica_count);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_backup_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n\n#include \"common/backup_common.h\"\n#include \"meta/meta_backup_service.h\"\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n#include \"meta_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass backup_service_test : public meta_test_base\n{\npublic:\n    backup_service_test()\n        : _policy_root(\"test_policy_root\"),\n          _backup_root(\"test_backup_root\"),\n          _app_name(\"test_app\"),\n          _backup_service(nullptr)\n    {\n    }\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        _ms->_backup_handler =\n            std::make_shared<backup_service>(_ms.get(), _policy_root, _backup_root, nullptr);\n        _backup_service = _ms->_backup_handler;\n\n        // create an app with 8 partitions.\n        create_app(_app_name);\n    }\n\n    start_backup_app_response\n    start_backup(int32_t app_id, const std::string &provider, const std::string &backup_path = \"\")\n    {\n        auto request = dsn::make_unique<start_backup_app_request>();\n        request->app_id = app_id;\n        request->backup_provider_type = provider;\n        if (!backup_path.empty()) {\n            request->__set_backup_path(backup_path);\n        }\n\n        start_backup_app_rpc rpc(std::move(request), RPC_CM_START_BACKUP_APP);\n        _backup_service->start_backup_app(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    query_backup_status_response query_backup(int32_t app_id, int64_t backup_id)\n    {\n        auto request = dsn::make_unique<query_backup_status_request>();\n        request->app_id = app_id;\n        request->__isset.backup_id = true;\n        request->backup_id = backup_id;\n\n        query_backup_status_rpc rpc(std::move(request), RPC_CM_QUERY_BACKUP_STATUS);\n        _backup_service->query_backup_status(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    bool write_metadata_succeed(int32_t app_id,\n                                int64_t backup_id,\n                                const std::string &user_specified_path)\n    {\n        std::string backup_root = dsn::utils::filesystem::path_combine(\n            user_specified_path, _backup_service->backup_root());\n        auto app = _ms->_state->get_app(app_id);\n        std::string metadata_file =\n            cold_backup::get_app_metadata_file(backup_root, app->app_name, app_id, backup_id);\n\n        int64_t metadata_file_size = 0;\n        if (!dsn::utils::filesystem::file_size(metadata_file, metadata_file_size)) {\n            return false;\n        }\n        return metadata_file_size > 0;\n    }\n\n    void test_specific_backup_path(int32_t test_app_id, const std::string &user_specified_path = \"\")\n    {\n        auto resp = start_backup(test_app_id, \"local_service_empty_root\", user_specified_path);\n        ASSERT_EQ(ERR_OK, resp.err);\n        ASSERT_TRUE(resp.__isset.backup_id);\n        ASSERT_EQ(1, _backup_service->_backup_states.size());\n\n        auto backup_engine = _backup_service->_backup_states[0];\n        if (user_specified_path.empty()) {\n            ASSERT_TRUE(backup_engine->_backup_path.empty());\n        } else {\n            ASSERT_EQ(user_specified_path, backup_engine->_backup_path);\n        }\n\n        int64_t backup_id = resp.backup_id;\n        ASSERT_TRUE(write_metadata_succeed(test_app_id, backup_id, user_specified_path));\n    }\n\nprotected:\n    const std::string _policy_root;\n    const std::string _backup_root;\n    const std::string _app_name;\n    std::shared_ptr<backup_service> _backup_service;\n};\n\nTEST_F(backup_service_test, test_invalid_backup_request)\n{\n    // invalid app id.\n    int32_t test_app_id = _ss->next_app_id();\n    auto resp = start_backup(test_app_id, \"local_service\");\n    ASSERT_EQ(ERR_INVALID_STATE, resp.err);\n\n    // invalid provider.\n    resp = start_backup(1, \"invalid_provider\");\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, resp.err);\n}\n\nTEST_F(backup_service_test, test_init_backup)\n{\n    int64_t now = dsn_now_ms();\n    auto resp = start_backup(1, \"local_service\");\n    ASSERT_EQ(ERR_OK, resp.err);\n    ASSERT_LE(now, resp.backup_id);\n    ASSERT_EQ(1, _backup_service->_backup_states.size());\n\n    // backup for app 1 is running, couldn't backup it again.\n    resp = start_backup(1, \"local_service\");\n    ASSERT_EQ(ERR_INVALID_STATE, resp.err);\n\n    resp = start_backup(2, \"local_service\");\n    ASSERT_EQ(ERR_OK, resp.err);\n}\n\nTEST_F(backup_service_test, test_write_backup_metadata_failed)\n{\n    fail::setup();\n    fail::cfg(\"mock_local_service_write_failed\", \"100%1*return(ERR_FS_INTERNAL)\");\n\n    // we couldn't start backup an app if write backup metadata failed.\n    auto resp = start_backup(1, \"local_service\");\n    ASSERT_EQ(ERR_FS_INTERNAL, resp.err);\n\n    fail::teardown();\n}\n\nTEST_F(backup_service_test, test_backup_app_with_no_specific_path) { test_specific_backup_path(1); }\n\nTEST_F(backup_service_test, test_backup_app_with_user_specified_path)\n{\n    test_specific_backup_path(1, \"test/backup\");\n}\n\nTEST_F(backup_service_test, test_query_backup_status)\n{\n    // query a backup that does not exist\n    auto resp = query_backup(1, 1);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, resp.err);\n\n    auto start_backup_resp = start_backup(1, \"local_service\");\n    ASSERT_EQ(ERR_OK, start_backup_resp.err);\n    ASSERT_EQ(1, _backup_service->_backup_states.size());\n\n    // query backup succeed\n    int64_t backup_id = start_backup_resp.backup_id;\n    resp = query_backup(1, backup_id);\n    ASSERT_EQ(ERR_OK, resp.err);\n    ASSERT_TRUE(resp.__isset.backup_items);\n    ASSERT_EQ(1, resp.backup_items.size());\n}\n\nclass backup_engine_test : public meta_test_base\n{\npublic:\n    backup_engine_test()\n        : _policy_root(\"test_policy_root\"),\n          _backup_root(\"test_backup_root\"),\n          _app_name(\"test_app\"),\n          _app_id(1),\n          _partition_count(8),\n          _backup_engine(nullptr)\n    {\n    }\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        _ms->_backup_handler =\n            std::make_shared<backup_service>(_ms.get(), _policy_root, _backup_root, nullptr);\n        _backup_engine = std::make_shared<backup_engine>(_ms->_backup_handler.get());\n        _backup_engine->set_block_service(\"local_service\");\n\n        zauto_lock lock(_backup_engine->_lock);\n        _backup_engine->_backup_status.clear();\n        for (int i = 0; i < _partition_count; ++i) {\n            _backup_engine->_backup_status.emplace(i, backup_status::UNALIVE);\n        }\n        _backup_engine->_cur_backup.app_id = _app_id;\n        _backup_engine->_cur_backup.app_name = _app_name;\n        _backup_engine->_cur_backup.backup_id = static_cast<int64_t>(dsn_now_ms());\n        _backup_engine->_cur_backup.start_time_ms = _backup_engine->_cur_backup.backup_id;\n    }\n\n    void mock_backup_app_partitions()\n    {\n        zauto_lock l(_backup_engine->_lock);\n        for (int i = 0; i < _partition_count; ++i) {\n            _backup_engine->_backup_status[i] = backup_status::ALIVE;\n        }\n    }\n\n    void mock_on_backup_reply(int32_t partition_index,\n                              error_code rpc_err,\n                              error_code resp_err,\n                              int32_t progress)\n    {\n        gpid pid = gpid(_app_id, partition_index);\n        rpc_address mock_primary_address = rpc_address(\"127.0.0.1\", 10000 + partition_index);\n\n        backup_response resp;\n        resp.backup_id = _backup_engine->_cur_backup.backup_id;\n        resp.pid = pid;\n        resp.err = resp_err;\n        resp.progress = progress;\n\n        _backup_engine->on_backup_reply(rpc_err, resp, pid, mock_primary_address);\n    }\n\n    void mock_on_backup_reply_when_timeout(int32_t partition_index, error_code rpc_err)\n    {\n        gpid pid = gpid(_app_id, partition_index);\n        rpc_address mock_primary_address = rpc_address(\"127.0.0.1\", 10000 + partition_index);\n        backup_response resp;\n        _backup_engine->on_backup_reply(rpc_err, resp, pid, mock_primary_address);\n    }\n\n    bool is_backup_failed() const\n    {\n        zauto_lock l(_backup_engine->_lock);\n        return _backup_engine->_is_backup_failed;\n    }\n\n    void reset_backup_engine()\n    {\n        zauto_lock l(_backup_engine->_lock);\n        _backup_engine->_is_backup_failed = false;\n    }\n\nprotected:\n    const std::string _policy_root;\n    const std::string _backup_root;\n    const std::string _app_name;\n    const int32_t _app_id;\n    const int32_t _partition_count;\n    std::shared_ptr<backup_engine> _backup_engine;\n};\n\nTEST_F(backup_engine_test, test_on_backup_reply)\n{\n    mock_backup_app_partitions();\n\n    // recieve a rpc error\n    mock_on_backup_reply(/*partition_index=*/0, ERR_NETWORK_FAILURE, ERR_BUSY, /*progress=*/0);\n    ASSERT_TRUE(_backup_engine->is_in_progress());\n\n    // recieve a backup finished response\n    reset_backup_engine();\n    mock_on_backup_reply(/*partition_index=*/1,\n                         ERR_OK,\n                         ERR_OK,\n                         /*progress=*/cold_backup_constant::PROGRESS_FINISHED);\n    ASSERT_TRUE(_backup_engine->is_in_progress());\n\n    // receive a backup in-progress response\n    reset_backup_engine();\n    mock_on_backup_reply(/*partition_index=*/2, ERR_OK, ERR_BUSY, /*progress=*/0);\n    ASSERT_TRUE(_backup_engine->is_in_progress());\n    ASSERT_EQ(_backup_engine->_backup_status[2], backup_status::ALIVE);\n\n    // if one partition fail, all backup plan will fail\n    {\n        // receive a backup failed response\n        reset_backup_engine();\n        mock_on_backup_reply(/*partition_index=*/3, ERR_OK, ERR_LOCAL_APP_FAILURE, /*progress=*/0);\n        ASSERT_TRUE(is_backup_failed());\n\n        // this backup is still a failure even received non-failure response\n        mock_on_backup_reply(/*partition_index=*/4, ERR_OK, ERR_BUSY, /*progress=*/0);\n        ASSERT_TRUE(is_backup_failed());\n\n        mock_on_backup_reply(/*partition_index=*/5,\n                             ERR_OK,\n                             ERR_OK,\n                             /*progress=*/cold_backup_constant::PROGRESS_FINISHED);\n        ASSERT_TRUE(is_backup_failed());\n    }\n\n    // meta request is timeout\n    reset_backup_engine();\n    mock_on_backup_reply_when_timeout(/*partition_index=*/5, ERR_TIMEOUT);\n    ASSERT_FALSE(is_backup_failed());\n}\n\nTEST_F(backup_engine_test, test_backup_completed)\n{\n    mock_backup_app_partitions();\n    for (int i = 0; i < _partition_count; ++i) {\n        mock_on_backup_reply(/*partition_index=*/i,\n                             ERR_OK,\n                             ERR_OK,\n                             /*progress=*/cold_backup_constant::PROGRESS_FINISHED);\n    }\n    ASSERT_FALSE(is_backup_failed());\n    ASSERT_LE(_backup_engine->_cur_backup.start_time_ms, _backup_engine->_cur_backup.end_time_ms);\n}\n\nTEST_F(backup_engine_test, test_write_backup_info_failed)\n{\n    fail::setup();\n    fail::cfg(\"mock_local_service_write_failed\", \"100%1*return(ERR_FS_INTERNAL)\");\n\n    // finish all partitions backup but write backup info failed.\n    mock_backup_app_partitions();\n    for (int i = 0; i < _partition_count; ++i) {\n        mock_on_backup_reply(/*partition_index=*/i,\n                             ERR_OK,\n                             ERR_OK,\n                             /*progress=*/cold_backup_constant::PROGRESS_FINISHED);\n    }\n    ASSERT_TRUE(is_backup_failed());\n    ASSERT_EQ(0, _backup_engine->_cur_backup.end_time_ms);\n\n    fail::teardown();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_bulk_load_ingestion_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"meta_test_base.h\"\n#include \"meta_service_test_app.h\"\n#include \"meta/meta_bulk_load_ingestion_context.h\"\n#include \"meta/meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass node_context_test : public meta_test_base\n{\npublic:\n    void SetUp()\n    {\n        _context = ingestion_context::node_context();\n        _context.node_ingesting_count = 0;\n        _context.address = NODE;\n        FLAGS_bulk_load_node_max_ingesting_count = 1;\n        FLAGS_bulk_load_node_min_disk_count = 1;\n    }\n\n    void TearDown()\n    {\n        _context.disk_ingesting_counts.clear();\n        _context.node_ingesting_count = 0;\n    }\n\n    void mock_context(const std::string &disk_tag,\n                      const uint32_t disk_count = 0,\n                      const uint32_t total_count = 0)\n    {\n        _context.node_ingesting_count = total_count;\n        _context.disk_ingesting_counts[disk_tag] = disk_count;\n    }\n\n    void init_disk(const std::string &disk_tag) { _context.init_disk(disk_tag); }\n\n    uint32_t get_disk_count(const std::string &disk_tag)\n    {\n        if (_context.disk_ingesting_counts.find(disk_tag) == _context.disk_ingesting_counts.end()) {\n            return -1;\n        }\n        return _context.disk_ingesting_counts[disk_tag];\n    }\n\n    void mock_get_max_disk_ingestion_count(const uint32_t node_min_disk_count,\n                                           const uint32_t current_disk_count)\n    {\n        FLAGS_bulk_load_node_min_disk_count = node_min_disk_count;\n        _context.disk_ingesting_counts.clear();\n        for (auto i = 0; i < current_disk_count; i++) {\n            _context.init_disk(std::to_string(i));\n        }\n    }\n\n    uint32_t get_max_disk_ingestion_count(const uint32_t max_node_count) const\n    {\n        return _context.get_max_disk_ingestion_count(max_node_count);\n    }\n\n    bool check_if_add() { return _context.check_if_add(TAG); }\n\npublic:\n    ingestion_context::node_context _context;\n    const rpc_address NODE = rpc_address(\"127.0.0.1\", 10086);\n    const std::string TAG = \"default\";\n    const std::string TAG2 = \"tag2\";\n};\n\nTEST_F(node_context_test, init_disk_test)\n{\n    mock_context(TAG, 1, 1);\n    struct init_disk_test\n    {\n        std::string disk_tag;\n        uint32_t expected_disk_count;\n    } tests[] = {{TAG, 1}, {TAG2, 0}};\n    for (const auto &test : tests) {\n        init_disk(test.disk_tag);\n        ASSERT_EQ(get_disk_count(test.disk_tag), test.expected_disk_count);\n    }\n}\n\nTEST_F(node_context_test, get_max_disk_ingestion_count_test)\n{\n    struct get_max_disk_ingestion_count_test\n    {\n        uint32_t max_node_count;\n        uint32_t min_disk_count;\n        uint32_t current_disk_count;\n        uint32_t expected_count;\n    } tests[] = {// min_disk_count = 1\n                 {1, 1, 1, 1},\n                 {2, 1, 1, 2},\n                 // min_disk_count = 3\n                 {1, 3, 1, 1},\n                 {4, 3, 3, 2},\n                 // min_disk_count = 7\n                 {1, 7, 1, 1},\n                 {1, 7, 11, 1},\n                 {7, 7, 1, 1},\n                 {7, 7, 11, 1},\n                 {8, 7, 3, 2},\n                 {8, 7, 8, 1},\n                 {8, 7, 11, 1}};\n    for (const auto &test : tests) {\n        mock_get_max_disk_ingestion_count(test.min_disk_count, test.current_disk_count);\n        ASSERT_EQ(get_max_disk_ingestion_count(test.max_node_count), test.expected_count);\n    }\n}\n\nTEST_F(node_context_test, check_if_add_test)\n{\n    fail::setup();\n    struct check_if_add_test\n    {\n        const uint32_t max_node_count;\n        const uint32_t current_node_count;\n        std::string max_disk_count_str;\n        const uint32_t current_disk_count;\n        bool expected_result;\n    } tests[] = {{1, 1, \"1\", 1, false}, {3, 2, \"2\", 2, false}, {1, 0, \"7\", 0, true}};\n    for (const auto &test : tests) {\n        FLAGS_bulk_load_node_max_ingesting_count = test.max_node_count;\n        mock_context(TAG, test.current_disk_count, test.current_node_count);\n        auto str = \"return(\" + test.max_disk_count_str + \")\";\n        fail::cfg(\"ingestion_node_context_disk_count\", str);\n        ASSERT_EQ(check_if_add(), test.expected_result);\n    }\n    fail::teardown();\n}\n\nclass ingestion_context_test : public meta_test_base\n{\npublic:\n    /// mock app and node info context\n    ///  node1    node2    node3    node4\n    /// p0(tag1) s0(tag1) s0(tag2)\n    /// s1(tag1) s1(tag2)          p1(tag2)\n    /// s2(tag2)          p2(tag1) s2(tag1)\n    ///          p3(tag1) s3(tag1) s3(tag2)\n    void SetUp()\n    {\n        _context = make_unique<ingestion_context>();\n        add_node_context({NODE1, NODE2, NODE3, NODE4});\n        mock_app();\n        FLAGS_bulk_load_node_min_disk_count = MIN_DISK_COUNT;\n        FLAGS_bulk_load_node_max_ingesting_count = MAX_NODE_COUNT;\n    }\n\n    void TearDown() { _context->reset_all(); }\n\n    void update_max_node_count(const uint32_t max_node_count)\n    {\n        FLAGS_bulk_load_node_max_ingesting_count = max_node_count;\n    }\n\n    bool check_node_ingestion(const uint32_t max_node_count,\n                              const rpc_address &node,\n                              const std::string &tag)\n    {\n        _context->reset_all();\n        update_max_node_count(max_node_count);\n        _context->_nodes_context[NODE1] = ingestion_context::node_context(NODE1, TAG1);\n        _context->_nodes_context[NODE1].add(TAG1);\n        return _context->check_node_ingestion(node, tag);\n    }\n\n    void mock_app()\n    {\n        app_info ainfo;\n        ainfo.app_id = APP_ID;\n        ainfo.partition_count = PARTITION_COUNT;\n        _app = std::make_shared<app_state>(ainfo);\n        _app->partitions.reserve(PARTITION_COUNT);\n        _app->helpers->contexts.reserve(PARTITION_COUNT);\n        mock_partition(0,\n                       {NODE1, NODE2, NODE3},\n                       {TAG1, TAG1, TAG2},\n                       _app->partitions[0],\n                       _app->helpers->contexts[0]);\n        mock_partition(1,\n                       {NODE4, NODE1, NODE2},\n                       {TAG2, TAG1, TAG2},\n                       _app->partitions[1],\n                       _app->helpers->contexts[1]);\n        mock_partition(2,\n                       {NODE3, NODE1, NODE4},\n                       {TAG1, TAG2, TAG1},\n                       _app->partitions[2],\n                       _app->helpers->contexts[2]);\n        mock_partition(3,\n                       {NODE2, NODE3, NODE4},\n                       {TAG1, TAG1, TAG2},\n                       _app->partitions[3],\n                       _app->helpers->contexts[3]);\n    }\n\n    void mock_partition(const uint32_t pidx,\n                        std::vector<rpc_address> nodes,\n                        const std::vector<std::string> tags,\n                        partition_configuration &config,\n                        config_context &cc)\n    {\n        config.pid = gpid(APP_ID, pidx);\n        config.primary = nodes[0];\n        config.secondaries.emplace_back(nodes[1]);\n        config.secondaries.emplace_back(nodes[2]);\n\n        auto count = nodes.size();\n        for (auto i = 0; i < count; i++) {\n            serving_replica r;\n            r.node = nodes[i];\n            r.disk_tag = tags[i];\n            cc.serving.emplace_back(r);\n        }\n    }\n\n    void add_node_context(std::vector<rpc_address> nodes)\n    {\n        for (const auto &address : nodes) {\n            ingestion_context::node_context node(address, TAG1);\n            node.init_disk(TAG2);\n            _context->_nodes_context[address] = node;\n        }\n    }\n\n    bool try_partition_ingestion(const uint32_t pidx)\n    {\n        return _context->try_partition_ingestion(_app->partitions[pidx],\n                                                 _app->helpers->contexts[pidx]);\n    }\n\n    void add_partition(const uint32_t pidx)\n    {\n        auto pinfo = ingestion_context::partition_node_info(_app->partitions[pidx],\n                                                            _app->helpers->contexts[pidx]);\n        _context->add_partition(pinfo);\n    }\n\n    void remove_partition(const uint32_t pidx) { _context->remove_partition(gpid(APP_ID, pidx)); }\n\n    bool is_partition_ingesting(const uint32_t pidx) const\n    {\n        return _context->_running_partitions.find(gpid(APP_ID, pidx)) !=\n               _context->_running_partitions.end();\n    }\n\n    uint32_t get_app_ingesting_count() const { return _context->get_app_ingesting_count(APP_ID); }\n\n    void reset_app() { return _context->reset_app(APP_ID); }\n\n    int32_t get_node_running_count(const rpc_address &node)\n    {\n        if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) {\n            return 0;\n        }\n        return _context->_nodes_context[node].node_ingesting_count;\n    }\n\n    uint32_t get_disk_running_count(const rpc_address &node, const std::string &disk_tag)\n    {\n        if (_context->_nodes_context.find(node) == _context->_nodes_context.end()) {\n            return 0;\n        }\n        auto node_cc = _context->_nodes_context[node];\n        if (node_cc.disk_ingesting_counts.find(disk_tag) == node_cc.disk_ingesting_counts.end()) {\n            return 0;\n        }\n        return node_cc.disk_ingesting_counts[disk_tag];\n    }\n\n    bool validate_count(const rpc_address &node,\n                        const uint32_t expected_node_count,\n                        const uint32_t expected_disk1_count,\n                        const uint32_t expected_disk2_count)\n    {\n        return get_node_running_count(node) == expected_node_count &&\n               get_disk_running_count(node, TAG1) == expected_disk1_count &&\n               get_disk_running_count(node, TAG2) == expected_disk2_count;\n    }\n\npublic:\n    std::unique_ptr<ingestion_context> _context;\n    std::shared_ptr<app_state> _app;\n    const uint32_t APP_ID = 1;\n    const uint32_t PARTITION_COUNT = 4;\n    const uint32_t MAX_NODE_COUNT = 2;\n    const uint32_t MIN_DISK_COUNT = 2;\n    const rpc_address NODE1 = rpc_address(\"127.0.0.1\", 10086);\n    const rpc_address NODE2 = rpc_address(\"127.0.0.1\", 10085);\n    const rpc_address NODE3 = rpc_address(\"127.0.0.1\", 10087);\n    const rpc_address NODE4 = rpc_address(\"127.0.0.1\", 10088);\n    const std::string TAG1 = \"tag1\";\n    const std::string TAG2 = \"tag2\";\n};\n\nTEST_F(ingestion_context_test, check_node_ingestion_test)\n{\n    struct check_node_ingestion_test\n    {\n        rpc_address node;\n        std::string tag;\n        uint32_t max_node_count;\n        bool expected_result;\n    } tests[] = {{NODE2, TAG1, 1, true}, {NODE1, TAG2, 2, true}, {NODE1, TAG2, 1, false}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(check_node_ingestion(test.max_node_count, test.node, test.tag),\n                  test.expected_result);\n    }\n}\n\nTEST_F(ingestion_context_test, try_partition_ingestion_test)\n{\n    update_max_node_count(1);\n    ASSERT_EQ(try_partition_ingestion(0), true);\n    ASSERT_EQ(try_partition_ingestion(1), false);\n\n    update_max_node_count(2);\n    ASSERT_EQ(try_partition_ingestion(1), false);\n    ASSERT_EQ(try_partition_ingestion(2), true);\n    ASSERT_EQ(try_partition_ingestion(3), false);\n\n    update_max_node_count(3);\n    ASSERT_EQ(try_partition_ingestion(1), true);\n    ASSERT_EQ(try_partition_ingestion(3), true);\n\n    ASSERT_EQ(get_app_ingesting_count(), 4);\n}\n\nTEST_F(ingestion_context_test, operation_test)\n{\n    ASSERT_FALSE(is_partition_ingesting(0));\n    add_partition(0);\n    ASSERT_TRUE(is_partition_ingesting(0));\n    ASSERT_TRUE(validate_count(NODE1, 1, 1, 0));\n    ASSERT_TRUE(validate_count(NODE2, 1, 1, 0));\n    ASSERT_TRUE(validate_count(NODE3, 1, 0, 1));\n    ASSERT_TRUE(validate_count(NODE4, 0, 0, 0));\n    ASSERT_EQ(get_app_ingesting_count(), 1);\n\n    ASSERT_FALSE(is_partition_ingesting(1));\n    add_partition(1);\n    ASSERT_TRUE(is_partition_ingesting(1));\n    ASSERT_TRUE(validate_count(NODE1, 2, 2, 0));\n    ASSERT_TRUE(validate_count(NODE2, 2, 1, 1));\n    ASSERT_TRUE(validate_count(NODE3, 1, 0, 1));\n    ASSERT_TRUE(validate_count(NODE4, 1, 0, 1));\n    ASSERT_EQ(get_app_ingesting_count(), 2);\n\n    add_partition(2);\n    remove_partition(0);\n    ASSERT_TRUE(is_partition_ingesting(2));\n    ASSERT_FALSE(is_partition_ingesting(0));\n    ASSERT_TRUE(validate_count(NODE1, 2, 1, 1));\n    ASSERT_TRUE(validate_count(NODE2, 1, 0, 1));\n    ASSERT_TRUE(validate_count(NODE3, 1, 1, 0));\n    ASSERT_TRUE(validate_count(NODE4, 2, 1, 1));\n    ASSERT_EQ(get_app_ingesting_count(), 2);\n\n    reset_app();\n    ASSERT_TRUE(validate_count(NODE1, 0, 0, 0));\n    ASSERT_TRUE(validate_count(NODE2, 0, 0, 0));\n    ASSERT_TRUE(validate_count(NODE3, 0, 0, 0));\n    ASSERT_TRUE(validate_count(NODE4, 0, 0, 0));\n    ASSERT_EQ(get_app_ingesting_count(), 0);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_bulk_load_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"meta_test_base.h\"\n#include \"meta_service_test_app.h\"\n#include \"meta/meta_bulk_load_service.h\"\n#include \"meta/meta_data.h\"\n#include \"meta/meta_server_failure_detector.h\"\n\nnamespace dsn {\nnamespace replication {\nclass bulk_load_service_test : public meta_test_base\n{\npublic:\n    bulk_load_service_test() {}\n\n    /// bulk load functions\n\n    start_bulk_load_response start_bulk_load(const std::string &app_name)\n    {\n        auto request = dsn::make_unique<start_bulk_load_request>();\n        request->app_name = app_name;\n        request->cluster_name = CLUSTER;\n        request->file_provider_type = PROVIDER;\n        request->remote_root_path = ROOT_PATH;\n\n        start_bulk_load_rpc rpc(std::move(request), RPC_CM_START_BULK_LOAD);\n        bulk_svc().on_start_bulk_load(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    error_code check_start_bulk_load_request_params(const std::string provider,\n                                                    int32_t app_id,\n                                                    int32_t partition_count)\n    {\n        start_bulk_load_request request;\n        request.app_name = APP_NAME;\n        request.cluster_name = CLUSTER;\n        request.file_provider_type = provider;\n        request.remote_root_path = ROOT_PATH;\n\n        std::map<std::string, std::string> envs;\n        std::string hint_msg;\n        return bulk_svc().check_bulk_load_request_params(\n            request, app_id, partition_count, envs, hint_msg);\n    }\n\n    bool validate_ingest_behind(bool mock_value, const std::string &app_value, bool request_value)\n    {\n        std::map<std::string, std::string> envs;\n        if (mock_value) {\n            envs[replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND] = app_value;\n        }\n        return bulk_svc().validate_ingest_behind(envs, request_value);\n    }\n\n    error_code control_bulk_load(int32_t app_id,\n                                 bulk_load_control_type::type type,\n                                 bulk_load_status::type app_status)\n    {\n        bulk_svc()._app_bulk_load_info[app_id].status = app_status;\n\n        auto request = dsn::make_unique<control_bulk_load_request>();\n        request->app_name = APP_NAME;\n        request->type = type;\n\n        control_bulk_load_rpc rpc(std::move(request), RPC_CM_CONTROL_BULK_LOAD);\n        bulk_svc().on_control_bulk_load(rpc);\n        wait_all();\n        return rpc.response().err;\n    }\n\n    error_code query_bulk_load(const std::string &app_name)\n    {\n        auto request = dsn::make_unique<query_bulk_load_request>();\n        request->app_name = app_name;\n\n        query_bulk_load_rpc rpc(std::move(request), RPC_CM_QUERY_BULK_LOAD_STATUS);\n        bulk_svc().on_query_bulk_load_status(rpc);\n        wait_all();\n        return rpc.response().err;\n    }\n\n    error_code\n    clear_bulk_load(int32_t app_id, const std::string &app_name, bulk_load_status::type app_status)\n    {\n        bulk_svc()._app_bulk_load_info[app_id].status = app_status;\n\n        auto request = dsn::make_unique<clear_bulk_load_state_request>();\n        request->app_name = app_name;\n\n        clear_bulk_load_rpc rpc(std::move(request), RPC_CM_CLEAR_BULK_LOAD);\n        bulk_svc().on_clear_bulk_load(rpc);\n        wait_all();\n        return rpc.response().err;\n    }\n\n    void mock_meta_bulk_load_context(int32_t app_id,\n                                     int32_t in_progress_partition_count,\n                                     bulk_load_status::type status,\n                                     bool mock_rollback_count = false)\n    {\n        bulk_svc()._bulk_load_app_id.insert(app_id);\n        bulk_svc()._apps_in_progress_count[app_id] = in_progress_partition_count;\n        bulk_svc()._app_bulk_load_info[app_id].status = status;\n        for (int i = 0; i < in_progress_partition_count; ++i) {\n            gpid pid = gpid(app_id, i);\n            bulk_svc()._partition_bulk_load_info[pid].status = status;\n        }\n        if (mock_rollback_count) {\n            bulk_svc()._apps_rollback_count[app_id] = FLAGS_bulk_load_max_rollback_times;\n        }\n    }\n\n    void mock_partition_bulk_load(const std::string &app_name, const gpid &pid)\n    {\n        ddebug_f(\"mock function, app({}), pid({})\", app_name, pid);\n    }\n\n    gpid before_check_partition_status(bulk_load_status::type status)\n    {\n        std::shared_ptr<app_state> app = find_app(APP_NAME);\n        partition_configuration config;\n        config.pid = gpid(app->app_id, 0);\n        config.max_replica_count = 3;\n        config.ballot = BALLOT;\n        config.primary = PRIMARY;\n        config.secondaries.emplace_back(SECONDARY1);\n        config.secondaries.emplace_back(SECONDARY2);\n        app->partitions.clear();\n        app->partitions.emplace_back(config);\n        mock_meta_bulk_load_context(app->app_id, app->partition_count, status);\n        return config.pid;\n    }\n\n    bool check_partition_status(const std::string name,\n                                bool mock_primary_invalid,\n                                bool mock_lack_secondary,\n                                gpid pid,\n                                bool always_unhealthy_check)\n    {\n        std::shared_ptr<app_state> app = find_app(name);\n        if (mock_primary_invalid) {\n            app->partitions[pid.get_partition_index()].primary.set_invalid();\n        }\n        if (mock_lack_secondary) {\n            app->partitions[pid.get_partition_index()].secondaries.clear();\n        }\n        partition_configuration pconfig;\n        bool flag = bulk_svc().check_partition_status(\n            name,\n            pid,\n            always_unhealthy_check,\n            std::bind(&bulk_load_service_test::mock_partition_bulk_load, this, name, pid),\n            pconfig);\n        wait_all();\n        return flag;\n    }\n\n    void set_partition_bulk_load_info(const gpid &pid,\n                                      bool ever_ingest_succeed,\n                                      bool use_secondary3 = false)\n    {\n        partition_bulk_load_info &pinfo = bulk_svc()._partition_bulk_load_info[pid];\n        pinfo.status = bulk_load_status::BLS_INGESTING;\n        pinfo.addresses.clear();\n        pinfo.addresses.emplace_back(PRIMARY);\n        pinfo.addresses.emplace_back(SECONDARY1);\n        if (use_secondary3) {\n            pinfo.addresses.emplace_back(SECONDARY3);\n        } else {\n            pinfo.addresses.emplace_back(SECONDARY2);\n        }\n        pinfo.ever_ingest_succeed = ever_ingest_succeed;\n    }\n\n    bool test_check_ever_ingestion(const gpid &pid,\n                                   bool ever_ingest_succeed,\n                                   int32_t secondary_count,\n                                   bool same)\n    {\n        set_partition_bulk_load_info(pid, ever_ingest_succeed);\n        partition_configuration config;\n        config.pid = pid;\n        config.primary = PRIMARY;\n        if (same) {\n            config.secondaries.emplace_back(SECONDARY1);\n            config.secondaries.emplace_back(SECONDARY2);\n        } else {\n            config.secondaries.emplace_back(SECONDARY1);\n            if (secondary_count == 2) {\n                config.secondaries.emplace_back(SECONDARY3);\n            } else if (secondary_count >= 3) {\n                config.secondaries.emplace_back(SECONDARY2);\n                config.secondaries.emplace_back(SECONDARY3);\n            }\n        }\n        auto flag = bulk_svc().check_ever_ingestion_succeed(config, APP_NAME, pid);\n        wait_all();\n        return flag;\n    }\n\n    void on_partition_bulk_load_reply(error_code err,\n                                      const bulk_load_request &request,\n                                      const bulk_load_response &response)\n    {\n        bulk_svc().on_partition_bulk_load_reply(err, request, response);\n    }\n\n    bool app_is_bulk_loading(const std::string &app_name)\n    {\n        return find_app(app_name)->is_bulk_loading;\n    }\n\n    bool need_update_metadata(gpid pid)\n    {\n        return bulk_svc().is_partition_metadata_not_updated(pid);\n    }\n\n    bulk_load_status::type get_app_bulk_load_status(int32_t app_id)\n    {\n        return bulk_svc().get_app_bulk_load_status_unlocked(app_id);\n    }\n\n    const partition_bulk_load_info &get_partition_bulk_load_info(const gpid &pid)\n    {\n        return bulk_svc()._partition_bulk_load_info[pid];\n    }\n\n    bulk_load_status::type get_partition_bulk_load_status(const gpid &pid)\n    {\n        return bulk_svc().get_partition_bulk_load_status_unlocked(pid);\n    }\n\n    error_code get_app_bulk_load_err(int32_t app_id)\n    {\n        return bulk_svc().get_app_bulk_load_err_unlocked(app_id);\n    }\n\n    void test_on_partition_ingestion_reply(ingestion_response &resp,\n                                           const gpid &pid,\n                                           error_code rpc_err = ERR_OK)\n    {\n        bulk_svc().on_partition_ingestion_reply(rpc_err, std::move(resp), APP_NAME, pid, PRIMARY);\n        wait_all();\n    }\n\n    void reset_local_bulk_load_states(int32_t app_id, const std::string &app_name)\n    {\n        bulk_svc().reset_local_bulk_load_states(app_id, app_name, true);\n    }\n\n    int32_t get_app_in_process_count(int32_t app_id)\n    {\n        return bulk_svc()._apps_in_progress_count[app_id];\n    }\n\n    // should call fail::setup() before calling this function\n    void set_app_ingesting_count(int32_t app_id, int32_t count)\n    {\n        fail::cfg(\"ingestion_try_partition_ingestion\", \"return()\");\n        config_context cc;\n        for (auto i = 0; i < count; i++) {\n            partition_configuration config;\n            config.pid = gpid(app_id, i);\n            bulk_svc().try_partition_ingestion(config, cc);\n        }\n    }\n\n    int32_t get_app_ingesting_count(int32_t app_id)\n    {\n        return bulk_svc().get_app_ingesting_count(app_id);\n    }\n\n    /// Used for bulk_load_failover_test\n\n    void initialize_meta_server_with_mock_bulk_load(\n        const std::unordered_set<int32_t> &app_id_set,\n        const std::unordered_map<app_id, app_bulk_load_info> &app_bulk_load_info_map,\n        const std::unordered_map<app_id, std::unordered_map<int32_t, partition_bulk_load_info>>\n            &partition_bulk_load_info_map,\n        const std::vector<app_info> &app_list)\n    {\n        // initialize meta service\n        auto meta_svc = new fake_receiver_meta_service();\n        meta_svc->remote_storage_initialize();\n\n        // initialize server_state\n        auto state = meta_svc->_state;\n        state->initialize(meta_svc, meta_svc->_cluster_root + \"/apps\");\n        _app_root = state->_apps_root;\n        meta_svc->_started = true;\n        _ms.reset(meta_svc);\n\n        // initialize bulk load service\n        _ms->_bulk_load_svc = make_unique<bulk_load_service>(\n            _ms.get(), meta_options::concat_path_unix_style(_ms->_cluster_root, \"bulk_load\"));\n        mock_bulk_load_on_remote_storage(\n            app_id_set, app_bulk_load_info_map, partition_bulk_load_info_map);\n\n        // mock app\n        for (auto &info : app_list) {\n            mock_app_on_remote_storage(info);\n        }\n        state->initialize_data_structure();\n\n        _ms->set_function_level(meta_function_level::fl_steady);\n        _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get()));\n        _ss = _ms->_state;\n    }\n\n    void mock_bulk_load_on_remote_storage(\n        const std::unordered_set<int32_t> &app_id_set,\n        const std::unordered_map<app_id, app_bulk_load_info> &app_bulk_load_info_map,\n        const std::unordered_map<app_id, std::unordered_map<int32_t, partition_bulk_load_info>>\n            &partition_bulk_load_info_map)\n    {\n        std::string path = bulk_svc()._bulk_load_root;\n        blob value = blob();\n        std::unordered_map<int32_t, partition_bulk_load_info> pinfo_map;\n        // create bulk_load_root\n        _ms->get_meta_storage()->create_node(\n            std::move(path),\n            std::move(value),\n            [this,\n             &app_id_set,\n             &app_bulk_load_info_map,\n             &partition_bulk_load_info_map,\n             &pinfo_map]() {\n                for (const auto app_id : app_id_set) {\n                    auto app_iter = app_bulk_load_info_map.find(app_id);\n                    auto partition_iter = partition_bulk_load_info_map.find(app_id);\n                    if (app_iter != app_bulk_load_info_map.end()) {\n                        mock_app_bulk_load_info_on_remote_storage(\n                            app_iter->second,\n                            partition_iter == partition_bulk_load_info_map.end()\n                                ? pinfo_map\n                                : partition_iter->second);\n                    }\n                }\n            });\n        wait_all();\n    }\n\n    void mock_app_bulk_load_info_on_remote_storage(\n        const app_bulk_load_info &ainfo,\n        const std::unordered_map<int32_t, partition_bulk_load_info> &partition_bulk_load_info_map)\n    {\n        std::string app_path = bulk_svc().get_app_bulk_load_path(ainfo.app_id);\n        blob value = json::json_forwarder<app_bulk_load_info>::encode(ainfo);\n        // create app_bulk_load_info\n        _ms->get_meta_storage()->create_node(\n            std::move(app_path),\n            std::move(value),\n            [this, app_path, &ainfo, &partition_bulk_load_info_map]() {\n                ddebug_f(\"create app({}) app_id={} bulk load dir({}), bulk_load_status={}\",\n                         ainfo.app_name,\n                         ainfo.app_id,\n                         app_path,\n                         dsn::enum_to_string(ainfo.status));\n                for (const auto &kv : partition_bulk_load_info_map) {\n                    mock_partition_bulk_load_info_on_remote_storage(gpid(ainfo.app_id, kv.first),\n                                                                    kv.second);\n                }\n            });\n    }\n\n    void mock_partition_bulk_load_info_on_remote_storage(const gpid &pid,\n                                                         const partition_bulk_load_info &pinfo)\n    {\n        std::string partition_path = bulk_svc().get_partition_bulk_load_path(pid);\n        blob value = json::json_forwarder<partition_bulk_load_info>::encode(pinfo);\n        _ms->get_meta_storage()->create_node(\n            std::move(partition_path), std::move(value), [partition_path, pid, &pinfo]() {\n                ddebug_f(\"create partition[{}] bulk load dir({}), bulk_load_status={}\",\n                         pid,\n                         partition_path,\n                         dsn::enum_to_string(pinfo.status));\n            });\n    }\n\n    void mock_app_on_remote_storage(const app_info &info)\n    {\n        static const char *lock_state = \"lock\";\n        static const char *unlock_state = \"unlock\";\n        std::string path = _app_root;\n\n        _ms->get_meta_storage()->create_node(\n            std::move(path), blob(lock_state, 0, strlen(lock_state)), [this]() {\n                ddebug_f(\"create app root {}\", _app_root);\n            });\n        wait_all();\n\n        blob value = json::json_forwarder<app_info>::encode(info);\n        _ms->get_meta_storage()->create_node(\n            _app_root + \"/\" + boost::lexical_cast<std::string>(info.app_id),\n            std::move(value),\n            [this, &info]() {\n                ddebug_f(\"create app({}) app_id={}, dir succeed\", info.app_name, info.app_id);\n                for (int i = 0; i < info.partition_count; ++i) {\n                    partition_configuration config;\n                    config.max_replica_count = 3;\n                    config.pid = gpid(info.app_id, i);\n                    config.ballot = BALLOT;\n                    blob v = json::json_forwarder<partition_configuration>::encode(config);\n                    _ms->get_meta_storage()->create_node(\n                        _app_root + \"/\" + boost::lexical_cast<std::string>(info.app_id) + \"/\" +\n                            boost::lexical_cast<std::string>(i),\n                        std::move(v),\n                        [info, i]() {\n                            ddebug_f(\"create app({}), partition({}.{}) dir succeed\",\n                                     info.app_name,\n                                     info.app_id,\n                                     i);\n                        });\n                }\n            });\n        wait_all();\n\n        std::string app_root = _app_root;\n        _ms->get_meta_storage()->set_data(\n            std::move(app_root), blob(unlock_state, 0, strlen(unlock_state)), []() {});\n        wait_all();\n    }\n\n    int32_t get_app_id_set_size() { return bulk_svc()._bulk_load_app_id.size(); }\n\n    int32_t get_partition_bulk_load_info_size(int32_t app_id)\n    {\n        int count = 0;\n        for (const auto &kv : bulk_svc()._partition_bulk_load_info) {\n            if (kv.first.get_app_id() == app_id) {\n                ++count;\n            }\n        }\n        return count;\n    }\n\n    bool is_app_bulk_load_states_reset(int32_t app_id)\n    {\n        return bulk_svc()._bulk_load_app_id.find(app_id) == bulk_svc()._bulk_load_app_id.end();\n    }\n\n    meta_op_status get_op_status() { return _ms->get_op_status(); }\n\n    void unlock_meta_op_status() { return _ms->unlock_meta_op_status(); }\npublic:\n    int32_t APP_ID = 1;\n    std::string APP_NAME = \"bulk_load_test\";\n    int32_t PARTITION_COUNT = 8;\n    std::string CLUSTER = \"cluster\";\n    std::string PROVIDER = \"local_service\";\n    std::string ROOT_PATH = \"bulk_load_root\";\n    int64_t BALLOT = 4;\n    const rpc_address PRIMARY = rpc_address(\"127.0.0.1\", 10086);\n    const rpc_address SECONDARY1 = rpc_address(\"127.0.0.1\", 10085);\n    const rpc_address SECONDARY2 = rpc_address(\"127.0.0.1\", 10087);\n    const rpc_address SECONDARY3 = rpc_address(\"127.0.0.1\", 10080);\n};\n\n/// start bulk load unit tests\nTEST_F(bulk_load_service_test, start_bulk_load_with_not_existed_app)\n{\n    auto resp = start_bulk_load(\"table_not_exist\");\n    ASSERT_EQ(resp.err, ERR_APP_NOT_EXIST);\n    meta_op_status st = get_op_status();\n    ASSERT_EQ(st, meta_op_status::FREE);\n}\n\nTEST_F(bulk_load_service_test, start_bulk_load_with_wrong_provider)\n{\n    create_app(APP_NAME);\n    error_code err = check_start_bulk_load_request_params(\"wrong_provider\", 1, PARTITION_COUNT);\n    ASSERT_EQ(err, ERR_INVALID_PARAMETERS);\n    meta_op_status st = get_op_status();\n    ASSERT_EQ(st, meta_op_status::FREE);\n}\n\nTEST_F(bulk_load_service_test, start_bulk_load_succeed)\n{\n    create_app(APP_NAME);\n    fail::setup();\n    fail::cfg(\"meta_check_bulk_load_request_params\", \"return()\");\n    fail::cfg(\"meta_bulk_load_partition_bulk_load\", \"return()\");\n    FLAGS_enable_concurrent_bulk_load = false;\n\n    auto resp = start_bulk_load(APP_NAME);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_TRUE(app_is_bulk_loading(APP_NAME));\n    meta_op_status st = get_op_status();\n    ASSERT_EQ(st, meta_op_status::BULKLOAD);\n    unlock_meta_op_status();\n    fail::teardown();\n}\n\n/// check partition status unit tests\nTEST_F(bulk_load_service_test, check_partition_status_app_wrong_test)\n{\n    std::string table_name = \"dropped_table\";\n    create_app(table_name);\n    fail::setup();\n    fail::cfg(\"meta_check_bulk_load_request_params\", \"return()\");\n    fail::cfg(\"meta_bulk_load_partition_bulk_load\", \"return()\");\n    fail::cfg(\"meta_bulk_load_resend_request\", \"return()\");\n    auto resp = start_bulk_load(table_name);\n    ASSERT_EQ(resp.err, ERR_OK);\n    std::shared_ptr<app_state> app = find_app(table_name);\n    app->status = app_status::AS_DROPPED;\n    ASSERT_FALSE(check_partition_status(table_name, false, false, gpid(app->app_id, 0), false));\n    ASSERT_TRUE(is_app_bulk_load_states_reset(app->app_id));\n}\n\nTEST_F(bulk_load_service_test, check_partition_status_test)\n{\n    create_app(APP_NAME);\n    struct status_test\n    {\n        bulk_load_status::type status;\n        bool always_check;\n        bool mock_primary_invalid;\n        bool mock_lack_secondary;\n        bool expected_val;\n    } tests[] = {\n        // mock primary invalid\n        {bulk_load_status::BLS_DOWNLOADING, false, true, false, false},\n        // mock secondary invalid with always_check=false\n        {bulk_load_status::BLS_DOWNLOADING, false, false, true, true},\n        {bulk_load_status::BLS_DOWNLOADED, false, false, true, false},\n        {bulk_load_status::BLS_INGESTING, false, false, true, false},\n        {bulk_load_status::BLS_SUCCEED, false, false, true, false},\n        {bulk_load_status::BLS_PAUSING, false, false, true, true},\n        {bulk_load_status::BLS_PAUSED, false, false, true, false},\n        {bulk_load_status::BLS_CANCELED, false, false, true, true},\n        {bulk_load_status::BLS_FAILED, false, false, true, true},\n        {bulk_load_status::BLS_INVALID, false, false, true, false},\n        // mock secondary invalid with always_check=true\n        {bulk_load_status::BLS_INGESTING, true, false, true, false},\n        // normal case\n        {bulk_load_status::BLS_INGESTING, false, false, false, true},\n    };\n    for (auto test : tests) {\n        auto pid = before_check_partition_status(test.status);\n        ASSERT_EQ(check_partition_status(APP_NAME,\n                                         test.mock_primary_invalid,\n                                         test.mock_lack_secondary,\n                                         pid,\n                                         test.always_check),\n                  test.expected_val);\n    }\n    drop_app(APP_NAME);\n}\n\n/// validate ingest behind unit tests\nTEST_F(bulk_load_service_test, validate_ingest_behind_test)\n{\n    struct validate_test\n    {\n        bool mock_value;\n        std::string app_value;\n        bool request_value;\n        bool expected_result;\n    } tests[] = {{true, \"true\", true, true},\n                 {true, \"true\", false, true},\n                 {true, \"false\", true, false},\n                 {true, \"false\", false, true},\n                 {true, \"invalid\", true, false},\n                 {true, \"invalid\", false, true},\n                 {false, \"false\", true, false},\n                 {false, \"false\", false, true}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(validate_ingest_behind(test.mock_value, test.app_value, test.request_value),\n                  test.expected_result);\n    }\n}\n\n/// check_ever_ingestion_succeed unit tests\nTEST_F(bulk_load_service_test, check_ever_ingestion_test)\n{\n    create_app(APP_NAME);\n    const auto &app = find_app(APP_NAME);\n    auto pid = gpid(app->app_id, 0);\n    start_bulk_load(APP_NAME);\n    mock_meta_bulk_load_context(app->app_id, app->partition_count, bulk_load_status::BLS_INGESTING);\n    // Test cases:\n    // - ever_ingest_succeed=false\n    // - ever_ingest_succeed=true, secondary address same\n    // - ever_ingest_succeed=true, secondary address different\n    // - ever_ingest_succeed=true, secondary address count is 1\n    // - ever_ingest_succeed=true, secondary address count is 3\n    struct ever_ingestion_test\n    {\n        bool ever_ingest_succeed;\n        int32_t secondary_count;\n        bool same;\n        bool expected_value;\n        bulk_load_status::type expected_bulk_load_status;\n    } tests[]{{false, 2, true, false, bulk_load_status::BLS_INGESTING},\n              {true, 2, true, true, bulk_load_status::BLS_SUCCEED},\n              {true, 2, false, false, bulk_load_status::BLS_INGESTING},\n              {true, 1, false, false, bulk_load_status::BLS_INGESTING},\n              {true, 3, false, false, bulk_load_status::BLS_INGESTING}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(test_check_ever_ingestion(\n                      pid, test.ever_ingest_succeed, test.secondary_count, test.same),\n                  test.expected_value);\n        ASSERT_EQ(get_partition_bulk_load_status(pid), test.expected_bulk_load_status);\n    }\n    drop_app(APP_NAME);\n}\n\n/// control bulk load unit tests\nTEST_F(bulk_load_service_test, control_bulk_load_test)\n{\n    create_app(APP_NAME);\n    std::shared_ptr<app_state> app = find_app(APP_NAME);\n    app->is_bulk_loading = true;\n    mock_meta_bulk_load_context(app->app_id, app->partition_count, bulk_load_status::BLS_INVALID);\n    fail::setup();\n    fail::cfg(\"meta_update_app_status_on_remote_storage_unlocked\", \"return()\");\n\n    struct control_test\n    {\n        bulk_load_control_type::type type;\n        bulk_load_status::type app_status;\n        error_code expected_err;\n    } tests[] = {\n        {bulk_load_control_type::BLC_PAUSE, bulk_load_status::BLS_DOWNLOADING, ERR_OK},\n        {bulk_load_control_type::BLC_PAUSE, bulk_load_status::BLS_DOWNLOADED, ERR_INVALID_STATE},\n        {bulk_load_control_type::BLC_RESTART, bulk_load_status::BLS_PAUSED, ERR_OK},\n        {bulk_load_control_type::BLC_RESTART, bulk_load_status::BLS_PAUSING, ERR_INVALID_STATE},\n        {bulk_load_control_type::BLC_CANCEL, bulk_load_status::BLS_DOWNLOADING, ERR_OK},\n        {bulk_load_control_type::BLC_CANCEL, bulk_load_status::BLS_PAUSED, ERR_OK},\n        {bulk_load_control_type::BLC_CANCEL, bulk_load_status::BLS_INGESTING, ERR_INVALID_STATE},\n        {bulk_load_control_type::BLC_FORCE_CANCEL, bulk_load_status::BLS_SUCCEED, ERR_OK}};\n\n    for (auto test : tests) {\n        ASSERT_EQ(control_bulk_load(app->app_id, test.type, test.app_status), test.expected_err);\n    }\n    reset_local_bulk_load_states(app->app_id, APP_NAME);\n    fail::teardown();\n}\n\n/// query bulk load status unit tests\nTEST_F(bulk_load_service_test, query_bulk_load_status_with_wrong_state)\n{\n    create_app(APP_NAME);\n    ASSERT_EQ(query_bulk_load(APP_NAME), ERR_OK);\n}\n\nTEST_F(bulk_load_service_test, query_bulk_load_status_success)\n{\n    create_app(APP_NAME);\n    auto app = find_app(APP_NAME);\n    app->is_bulk_loading = true;\n    ASSERT_EQ(query_bulk_load(APP_NAME), ERR_OK);\n}\n\n/// clear bulk load unit tests\nTEST_F(bulk_load_service_test, clear_bulk_load_test)\n{\n    create_app(APP_NAME);\n    std::shared_ptr<app_state> app = find_app(APP_NAME);\n    mock_meta_bulk_load_context(app->app_id, app->partition_count, bulk_load_status::BLS_INVALID);\n    fail::setup();\n    fail::cfg(\"meta_do_clear_app_bulk_load_result\", \"return()\");\n\n    struct clear_test\n    {\n        std::string app_name;\n        bool is_bulk_loading;\n        bulk_load_status::type app_status;\n        error_code expected_err;\n    } tests[] = {{\"not_exist_app\", false, bulk_load_status::BLS_INVALID, ERR_APP_NOT_EXIST},\n                 {APP_NAME, true, bulk_load_status::BLS_DOWNLOADING, ERR_INVALID_STATE},\n                 {APP_NAME, false, bulk_load_status::BLS_SUCCEED, ERR_OK},\n                 {APP_NAME, false, bulk_load_status::BLS_FAILED, ERR_OK},\n                 {APP_NAME, false, bulk_load_status::BLS_CANCELED, ERR_OK}};\n\n    for (auto test : tests) {\n        app->is_bulk_loading = test.is_bulk_loading;\n        ASSERT_EQ(clear_bulk_load(app->app_id, test.app_name, test.app_status), test.expected_err);\n    }\n    reset_local_bulk_load_states(app->app_id, APP_NAME);\n    fail::teardown();\n}\n\n/// bulk load process unit tests\nclass bulk_load_process_test : public bulk_load_service_test\n{\npublic:\n    void SetUp()\n    {\n        bulk_load_service_test::SetUp();\n        create_app(APP_NAME);\n\n        fail::setup();\n        fail::cfg(\"meta_check_bulk_load_request_params\", \"return()\");\n        fail::cfg(\"meta_bulk_load_partition_bulk_load\", \"return()\");\n        fail::cfg(\"meta_bulk_load_resend_request\", \"return()\");\n\n        auto resp = start_bulk_load(APP_NAME);\n        ASSERT_EQ(resp.err, ERR_OK);\n        std::shared_ptr<app_state> app = find_app(APP_NAME);\n        _app_id = app->app_id;\n        _partition_count = app->partition_count;\n        ASSERT_EQ(app->is_bulk_loading, true);\n    }\n\n    void TearDown()\n    {\n        unlock_meta_op_status();\n        fail::teardown();\n        bulk_load_service_test::TearDown();\n    }\n\n    void create_request(bulk_load_status::type status)\n    {\n        _req.app_name = APP_NAME;\n        _req.ballot = BALLOT;\n        _req.cluster_name = CLUSTER;\n        _req.pid = gpid(_app_id, _pidx);\n        _req.primary_addr = PRIMARY;\n        _req.meta_bulk_load_status = status;\n    }\n\n    void create_basic_response(error_code err, bulk_load_status::type status)\n    {\n        _resp.app_name = APP_NAME;\n        _resp.pid = gpid(_app_id, _pidx);\n        _resp.err = err;\n        _resp.primary_bulk_load_status = status;\n    }\n\n    void mock_response_progress(error_code progress_err, bool finish_download)\n    {\n        create_basic_response(ERR_OK, bulk_load_status::BLS_DOWNLOADING);\n\n        partition_bulk_load_state state, state2;\n        int32_t secondary2_progress = finish_download ? 100 : 0;\n        int32_t total_progress = finish_download ? 100 : 66;\n        state.__set_download_status(ERR_OK);\n        state.__set_download_progress(100);\n        state2.__set_download_status(progress_err);\n        state2.__set_download_progress(secondary2_progress);\n\n        _resp.group_bulk_load_state[PRIMARY] = state;\n        _resp.group_bulk_load_state[SECONDARY1] = state;\n        _resp.group_bulk_load_state[SECONDARY2] = state2;\n        _resp.__set_total_download_progress(total_progress);\n    }\n\n    void mock_response_bulk_load_metadata()\n    {\n        mock_response_progress(ERR_OK, false);\n\n        file_meta f_meta;\n        f_meta.name = \"mock_remote_file\";\n        f_meta.size = 100;\n        f_meta.md5 = \"mock_md5\";\n\n        bulk_load_metadata metadata;\n        metadata.files.emplace_back(f_meta);\n        metadata.file_total_size = 100;\n\n        _resp.__set_metadata(metadata);\n    }\n\n    void mock_response_ingestion_status(ingestion_status::type secondary_istatus,\n                                        int32_t ingestion_count)\n    {\n        create_basic_response(ERR_OK, bulk_load_status::BLS_INGESTING);\n\n        partition_bulk_load_state state, state2;\n        state.__set_ingest_status(ingestion_status::IS_SUCCEED);\n        state2.__set_ingest_status(secondary_istatus);\n\n        _resp.group_bulk_load_state[PRIMARY] = state;\n        _resp.group_bulk_load_state[SECONDARY1] = state;\n        _resp.group_bulk_load_state[SECONDARY2] = state2;\n        _resp.__set_is_group_ingestion_finished(secondary_istatus == ingestion_status::IS_SUCCEED);\n        set_app_ingesting_count(_app_id, ingestion_count);\n    }\n\n    void mock_response_cleaned_up_flag(bool all_cleaned_up, bulk_load_status::type status)\n    {\n        create_basic_response(ERR_OK, status);\n\n        partition_bulk_load_state state, state2;\n        state.__set_is_cleaned_up(true);\n        _resp.group_bulk_load_state[PRIMARY] = state;\n        _resp.group_bulk_load_state[SECONDARY1] = state;\n\n        state2.__set_is_cleaned_up(all_cleaned_up);\n        _resp.group_bulk_load_state[SECONDARY2] = state2;\n        _resp.__set_is_group_bulk_load_context_cleaned_up(all_cleaned_up);\n    }\n\n    void mock_response_paused(bool is_group_paused)\n    {\n        create_basic_response(ERR_OK, bulk_load_status::BLS_PAUSED);\n\n        partition_bulk_load_state state, state2;\n        state.__set_is_paused(true);\n        state2.__set_is_paused(is_group_paused);\n\n        _resp.group_bulk_load_state[PRIMARY] = state;\n        _resp.group_bulk_load_state[SECONDARY1] = state;\n        _resp.group_bulk_load_state[SECONDARY2] = state2;\n        _resp.__set_is_group_bulk_load_paused(is_group_paused);\n    }\n\n    void test_on_partition_bulk_load_reply(int32_t in_progress_count,\n                                           bulk_load_status::type status,\n                                           error_code resp_err = ERR_OK,\n                                           bool mock_rollback_count = false)\n    {\n        mock_meta_bulk_load_context(_app_id, in_progress_count, status, mock_rollback_count);\n        create_request(status);\n        auto response = _resp;\n        response.err = resp_err;\n        on_partition_bulk_load_reply(ERR_OK, _req, response);\n        wait_all();\n    }\n\n    void mock_ingestion_context(error_code err,\n                                int32_t rocksdb_err,\n                                int32_t in_progress_count,\n                                int32_t ingestion_count)\n    {\n        mock_meta_bulk_load_context(_app_id, in_progress_count, bulk_load_status::BLS_INGESTING);\n        set_app_ingesting_count(_app_id, ingestion_count);\n        _ingestion_resp.err = err;\n        _ingestion_resp.rocksdb_error = rocksdb_err;\n    }\n\npublic:\n    const int32_t _pidx = 0;\n\n    int32_t _app_id;\n    int32_t _partition_count;\n    bulk_load_request _req;\n    bulk_load_response _resp;\n    ingestion_response _ingestion_resp;\n};\n\n/// on_partition_bulk_load_reply unit tests\n\nTEST_F(bulk_load_process_test, downloading_fs_error)\n{\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_DOWNLOADING, ERR_FS_INTERNAL);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_FS_INTERNAL);\n}\n\nTEST_F(bulk_load_process_test, downloading_busy)\n{\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_DOWNLOADING, ERR_BUSY);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n}\n\nTEST_F(bulk_load_process_test, downloading_corrupt)\n{\n    mock_response_progress(ERR_CORRUPTION, false);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_CORRUPTION);\n}\n\nTEST_F(bulk_load_process_test, downloading_report_metadata)\n{\n    mock_response_bulk_load_metadata();\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_DOWNLOADING);\n\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_FALSE(need_update_metadata(gpid(_app_id, _pidx)));\n}\n\nTEST_F(bulk_load_process_test, normal_downloading)\n{\n    mock_response_progress(ERR_OK, false);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n}\n\nTEST_F(bulk_load_process_test, downloaded_succeed)\n{\n    mock_response_progress(ERR_OK, true);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADED);\n}\n\nTEST_F(bulk_load_process_test, start_ingesting)\n{\n    fail::cfg(\"meta_bulk_load_partition_ingestion\", \"return()\");\n    mock_response_progress(ERR_OK, true);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_DOWNLOADED);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n}\n\nTEST_F(bulk_load_process_test, ingestion_running)\n{\n    mock_response_ingestion_status(ingestion_status::IS_RUNNING, 4);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 4);\n}\n\nTEST_F(bulk_load_process_test, ingestion_error)\n{\n    mock_response_ingestion_status(ingestion_status::IS_FAILED, 3);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 2);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_INGESTION_FAILED);\n}\n\nTEST_F(bulk_load_process_test, ingestion_one_succeed)\n{\n    mock_response_ingestion_status(ingestion_status::IS_SUCCEED, 4);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 3);\n    const auto &pinfo = get_partition_bulk_load_info(gpid(_app_id, _pidx));\n    ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED);\n    ASSERT_TRUE(pinfo.ever_ingest_succeed);\n    ASSERT_EQ(pinfo.addresses.size(), 3);\n}\n\nTEST_F(bulk_load_process_test, ingestion_one_succeed_update)\n{\n    const auto pid = gpid(_app_id, _pidx);\n    mock_response_ingestion_status(ingestion_status::IS_SUCCEED, 4);\n    set_partition_bulk_load_info(pid, true, true);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 3);\n    const auto &pinfo = get_partition_bulk_load_info(pid);\n    ASSERT_EQ(pinfo.status, bulk_load_status::BLS_SUCCEED);\n    ASSERT_TRUE(pinfo.ever_ingest_succeed);\n    ASSERT_EQ(pinfo.addresses.size(), 3);\n    ASSERT_EQ(std::find(pinfo.addresses.begin(), pinfo.addresses.end(), SECONDARY3),\n              pinfo.addresses.end());\n}\n\nTEST_F(bulk_load_process_test, normal_succeed)\n{\n    mock_response_ingestion_status(ingestion_status::IS_SUCCEED, 1);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_SUCCEED);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 0);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_OK);\n}\n\nTEST_F(bulk_load_process_test, succeed_not_all_finished)\n{\n    mock_response_cleaned_up_flag(false, bulk_load_status::BLS_SUCCEED);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_SUCCEED);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_SUCCEED);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_OK);\n}\n\nTEST_F(bulk_load_process_test, succeed_all_finished)\n{\n    mock_response_cleaned_up_flag(true, bulk_load_status::BLS_SUCCEED);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_SUCCEED);\n    ASSERT_FALSE(app_is_bulk_loading(APP_NAME));\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_OK);\n}\n\nTEST_F(bulk_load_process_test, cancel_not_all_finished)\n{\n    mock_response_cleaned_up_flag(false, bulk_load_status::BLS_CANCELED);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_CANCELED);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_CANCELED);\n}\n\nTEST_F(bulk_load_process_test, cancel_all_finished)\n{\n    mock_response_cleaned_up_flag(true, bulk_load_status::BLS_CANCELED);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_CANCELED);\n    ASSERT_FALSE(app_is_bulk_loading(APP_NAME));\n}\n\nTEST_F(bulk_load_process_test, failed_not_all_finished)\n{\n    mock_response_cleaned_up_flag(false, bulk_load_status::BLS_FAILED);\n    test_on_partition_bulk_load_reply(_partition_count, bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_OK);\n}\n\nTEST_F(bulk_load_process_test, failed_all_finished)\n{\n    mock_response_cleaned_up_flag(true, bulk_load_status::BLS_FAILED);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_FAILED);\n    ASSERT_FALSE(app_is_bulk_loading(APP_NAME));\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_OK);\n}\n\nTEST_F(bulk_load_process_test, pausing)\n{\n    mock_response_paused(false);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_PAUSING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_PAUSING);\n}\n\nTEST_F(bulk_load_process_test, pause_succeed)\n{\n    mock_response_paused(true);\n    test_on_partition_bulk_load_reply(1, bulk_load_status::BLS_PAUSING);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_PAUSED);\n}\n\nTEST_F(bulk_load_process_test, rpc_error)\n{\n    mock_meta_bulk_load_context(_app_id, _partition_count, bulk_load_status::BLS_DOWNLOADED);\n    create_request(bulk_load_status::BLS_DOWNLOADED);\n    on_partition_bulk_load_reply(ERR_TIMEOUT, _req, _resp);\n    wait_all();\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_in_process_count(_app_id), _partition_count);\n}\n\nTEST_F(bulk_load_process_test, response_invalid_state)\n{\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_INGESTING, ERR_INVALID_STATE);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_in_process_count(_app_id), _partition_count);\n}\n\nTEST_F(bulk_load_process_test, response_object_not_found)\n{\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_CANCELED, ERR_OBJECT_NOT_FOUND);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_CANCELED);\n    ASSERT_EQ(get_app_in_process_count(_app_id), _partition_count);\n}\n\nTEST_F(bulk_load_process_test, rollback_count_exceed)\n{\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_DOWNLOADING, ERR_INVALID_STATE, true);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_in_process_count(_app_id), _partition_count);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_RETRY_EXHAUSTED);\n}\n\nTEST_F(bulk_load_process_test, response_ingestion_error)\n{\n    set_app_ingesting_count(_app_id, 3);\n    test_on_partition_bulk_load_reply(\n        _partition_count, bulk_load_status::BLS_INGESTING, ERR_INVALID_STATE);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_in_process_count(_app_id), _partition_count);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 0);\n}\n\n/// on_partition_ingestion_reply unit tests\nTEST_F(bulk_load_process_test, ingest_rpc_error)\n{\n    mock_ingestion_context(ERR_OK, 1, _partition_count, 1);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx), ERR_TIMEOUT);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 0);\n}\n\nTEST_F(bulk_load_process_test, repeated_ingest_rpc)\n{\n    mock_ingestion_context(ERR_OK, 1, _partition_count, 2);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx), ERR_NO_NEED_OPERATE);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 1);\n}\n\nTEST_F(bulk_load_process_test, ingest_wrong_state)\n{\n    mock_ingestion_context(ERR_OK, 1, _partition_count, 3);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx), ERR_INVALID_STATE);\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 0);\n}\n\nTEST_F(bulk_load_process_test, ingest_empty_write_error)\n{\n    fail::cfg(\"meta_bulk_load_partition_ingestion\", \"return()\");\n    mock_ingestion_context(ERR_TRY_AGAIN, 11, _partition_count, 4);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx));\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 3);\n}\n\nTEST_F(bulk_load_process_test, ingest_wrong)\n{\n    mock_ingestion_context(ERR_OK, 1, _partition_count, 4);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx));\n    wait_all();\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 3);\n    ASSERT_EQ(get_app_bulk_load_err(_app_id), ERR_INGESTION_FAILED);\n}\n\nTEST_F(bulk_load_process_test, ingest_succeed)\n{\n    mock_ingestion_context(ERR_OK, 0, 1, 3);\n    test_on_partition_ingestion_reply(_ingestion_resp, gpid(_app_id, _pidx));\n    ASSERT_EQ(get_app_bulk_load_status(_app_id), bulk_load_status::BLS_INGESTING);\n    ASSERT_EQ(get_app_ingesting_count(_app_id), 3);\n}\n\nclass bulk_load_failover_test : public bulk_load_service_test\n{\npublic:\n    bulk_load_failover_test() {}\n\n    void SetUp()\n    {\n        fail::setup();\n        fail::cfg(\"meta_bulk_load_partition_bulk_load\", \"return()\");\n        fail::cfg(\"meta_bulk_load_partition_ingestion\", \"return()\");\n    }\n\n    void TearDown()\n    {\n        clean_up();\n        fail::teardown();\n        bulk_load_service_test::TearDown();\n    }\n\n    void try_to_continue_bulk_load(bulk_load_status::type app_status, bool is_bulk_loading = true)\n    {\n        prepare_bulk_load_structures(SYNC_APP_ID,\n                                     SYNC_PARTITION_COUNT,\n                                     SYNC_APP_NAME,\n                                     app_status,\n                                     _pstatus_map,\n                                     is_bulk_loading);\n        initialize_meta_server_with_mock_bulk_load(\n            _app_id_set, _app_bulk_load_info_map, _partition_bulk_load_info_map, _app_info_list);\n        bulk_svc().initialize_bulk_load_service();\n        wait_all();\n    }\n\n    void\n    prepare_bulk_load_structures(int32_t app_id,\n                                 int32_t partition_count,\n                                 std::string &app_name,\n                                 bulk_load_status::type app_status,\n                                 std::unordered_map<int32_t, bulk_load_status::type> &pstatus_map,\n                                 bool is_bulk_loading)\n    {\n        _app_id_set.insert(app_id);\n        mock_app_bulk_load_info(app_id, partition_count, app_name, app_status);\n        mock_partition_bulk_load_info(app_id, pstatus_map);\n        add_to_app_info_list(app_id, partition_count, app_name, is_bulk_loading);\n    }\n\n    void mock_app_bulk_load_info(int32_t app_id,\n                                 int32_t partition_count,\n                                 std::string &app_name,\n                                 bulk_load_status::type status)\n    {\n        app_bulk_load_info ainfo;\n        ainfo.app_id = app_id;\n        ainfo.app_name = app_name;\n        ainfo.cluster_name = CLUSTER;\n        ainfo.file_provider_type = PROVIDER;\n        ainfo.remote_root_path = ROOT_PATH;\n        ainfo.partition_count = partition_count;\n        ainfo.status = status;\n        ainfo.ingest_behind = false;\n        ainfo.is_ever_ingesting = false;\n        ainfo.bulk_load_err = ERR_OK;\n        _app_bulk_load_info_map[app_id] = ainfo;\n    }\n\n    void\n    mock_partition_bulk_load_info(int32_t app_id,\n                                  std::unordered_map<int32_t, bulk_load_status::type> &pstatus_map)\n    {\n        if (pstatus_map.size() <= 0) {\n            return;\n        }\n        std::unordered_map<int32_t, partition_bulk_load_info> pinfo_map;\n        for (auto iter = pstatus_map.begin(); iter != pstatus_map.end(); ++iter) {\n            partition_bulk_load_info pinfo;\n            pinfo.status = iter->second;\n            pinfo_map[iter->first] = pinfo;\n        }\n        _partition_bulk_load_info_map[app_id] = pinfo_map;\n    }\n\n    void add_to_app_info_list(int32_t app_id,\n                              int32_t partition_count,\n                              std::string &app_name,\n                              bool is_bulk_loading)\n    {\n        app_info ainfo;\n        ainfo.app_id = app_id;\n        ainfo.app_name = app_name;\n        ainfo.app_type = \"pegasus\";\n        ainfo.is_stateful = true;\n        ainfo.is_bulk_loading = is_bulk_loading;\n        ainfo.max_replica_count = 3;\n        ainfo.partition_count = partition_count;\n        ainfo.status = app_status::AS_AVAILABLE;\n        _app_info_list.emplace_back(ainfo);\n    }\n\n    void mock_pstatus_map(bulk_load_status::type status, int32_t end_index, int32_t start_index = 0)\n    {\n        for (auto i = start_index; i <= end_index; ++i) {\n            _pstatus_map[i] = status;\n        }\n    }\n\n    void clean_up()\n    {\n        _app_info_list.clear();\n        _app_bulk_load_info_map.clear();\n        _partition_bulk_load_info_map.clear();\n        _pstatus_map.clear();\n        _app_id_set.clear();\n    }\n\n    std::string SYNC_APP_NAME = \"bulk_load_failover_table\";\n    int32_t SYNC_APP_ID = 2;\n    int32_t SYNC_PARTITION_COUNT = 4;\n\n    std::vector<app_info> _app_info_list;\n    std::unordered_set<int32_t> _app_id_set;\n    std::unordered_map<app_id, app_bulk_load_info> _app_bulk_load_info_map;\n    std::unordered_map<app_id, std::unordered_map<int32_t, partition_bulk_load_info>>\n        _partition_bulk_load_info_map;\n    std::unordered_map<int32_t, bulk_load_status::type> _pstatus_map;\n};\n\nTEST_F(bulk_load_failover_test, sync_bulk_load)\n{\n    fail::cfg(\"meta_try_to_continue_bulk_load\", \"return()\");\n\n    // mock app downloading with partition[0~1] downloading\n    std::unordered_map<int32_t, bulk_load_status::type> partition_bulk_load_status_map;\n    partition_bulk_load_status_map[0] = bulk_load_status::BLS_DOWNLOADING;\n    partition_bulk_load_status_map[1] = bulk_load_status::BLS_DOWNLOADING;\n    prepare_bulk_load_structures(SYNC_APP_ID,\n                                 SYNC_PARTITION_COUNT,\n                                 SYNC_APP_NAME,\n                                 bulk_load_status::BLS_DOWNLOADING,\n                                 partition_bulk_load_status_map,\n                                 true);\n\n    // mock app failed with no partition existed\n    partition_bulk_load_status_map.clear();\n    partition_bulk_load_status_map[0] = bulk_load_status::BLS_FAILED;\n    prepare_bulk_load_structures(APP_ID,\n                                 PARTITION_COUNT,\n                                 APP_NAME,\n                                 bulk_load_status::type::BLS_FAILED,\n                                 partition_bulk_load_status_map,\n                                 true);\n\n    initialize_meta_server_with_mock_bulk_load(\n        _app_id_set, _app_bulk_load_info_map, _partition_bulk_load_info_map, _app_info_list);\n    bulk_svc().initialize_bulk_load_service();\n    wait_all();\n\n    ASSERT_EQ(get_app_id_set_size(), 2);\n\n    ASSERT_TRUE(app_is_bulk_loading(SYNC_APP_NAME));\n    ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(get_partition_bulk_load_info_size(SYNC_APP_ID), 2);\n\n    ASSERT_TRUE(app_is_bulk_loading(APP_NAME));\n    ASSERT_EQ(get_app_bulk_load_status(APP_ID), bulk_load_status::BLS_FAILED);\n    ASSERT_EQ(get_partition_bulk_load_info_size(APP_ID), 1);\n}\n\n/// try_to_continue_bulk_load unit test\n// partition_count from bulk load is SYNC_PARTITION_COUNT, app partition_count is\n// PARTITION_COUNT\nTEST_F(bulk_load_failover_test, app_info_inconsistency)\n{\n    prepare_bulk_load_structures(SYNC_APP_ID,\n                                 PARTITION_COUNT,\n                                 SYNC_APP_NAME,\n                                 bulk_load_status::BLS_DOWNLOADED,\n                                 _pstatus_map,\n                                 true);\n    _app_bulk_load_info_map[SYNC_APP_ID].partition_count = SYNC_PARTITION_COUNT;\n    initialize_meta_server_with_mock_bulk_load(\n        _app_id_set, _app_bulk_load_info_map, _partition_bulk_load_info_map, _app_info_list);\n    bulk_svc().initialize_bulk_load_service();\n    wait_all();\n\n    ASSERT_FALSE(app_is_bulk_loading(SYNC_APP_NAME));\n}\n\nTEST_F(bulk_load_failover_test, app_downloading_test)\n{\n    // Test cases:\n    // - partition[0,1]=downloading, partition[2,3] not existed\n    // - partition[0,1]=downloading, partition[2]=downloaded, partition[3] not exist\n    // - partition[0~3]=downloading\n    // - partition[0~3]=downloaded\n    // - partition[0]=downloaded, partition[1~3]=downloading\n    // - partition[0-3]=succeed\n    struct app_downloading_test\n    {\n        int32_t start_index;\n        int32_t end_index;\n        bulk_load_status::type pstatus;\n        int32_t downloaded_pidx;\n        bool expected_is_bulk_loading;\n        int32_t expected_in_process_count;\n    } tests[] = {{0, 1, bulk_load_status::BLS_DOWNLOADING, -1, true, SYNC_PARTITION_COUNT},\n                 {0, 1, bulk_load_status::BLS_DOWNLOADING, 2, false, 0},\n                 {0, 3, bulk_load_status::BLS_DOWNLOADING, -1, true, SYNC_PARTITION_COUNT},\n                 {0, 3, bulk_load_status::BLS_DOWNLOADED, -1, true, SYNC_PARTITION_COUNT},\n                 {1, 3, bulk_load_status::BLS_DOWNLOADING, 0, true, SYNC_PARTITION_COUNT},\n                 {0, 3, bulk_load_status::BLS_SUCCEED, -1, true, SYNC_PARTITION_COUNT}};\n\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(test.pstatus, test.end_index, test.start_index);\n        if (test.downloaded_pidx > 0) {\n            _pstatus_map[test.downloaded_pidx] = bulk_load_status::BLS_DOWNLOADED;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_DOWNLOADING);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_DOWNLOADING);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), test.expected_in_process_count);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_downloaded_test)\n{\n    // Test cases:\n    // - partition[0]=downloaded, partition[1~3] not existed\n    // - partition[0]=ingesting, partition[1~3]=succeed\n    // - partition[0~3]=downloaded\n    // - partition[0~3]=ingesting\n    // - partition[0~2]=downloaded, partition[3]=ingesting\n    struct app_downloaded_test\n    {\n        int32_t start_index;\n        int32_t end_index;\n        bulk_load_status::type pstatus;\n        int32_t ingesting_pidx;\n        bool expected_is_bulk_loading;\n        int32_t expected_in_process_count;\n    } tests[] = {{0, 0, bulk_load_status::BLS_DOWNLOADED, -1, false, 0},\n                 {1, 3, bulk_load_status::BLS_SUCCEED, 0, false, 0},\n                 {0, 3, bulk_load_status::BLS_DOWNLOADED, -1, true, SYNC_PARTITION_COUNT},\n                 {0, 3, bulk_load_status::BLS_INGESTING, -1, true, 0},\n                 {0, 2, bulk_load_status::BLS_DOWNLOADED, 3, true, 3}};\n\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(test.pstatus, test.end_index, test.start_index);\n        if (test.ingesting_pidx > 0) {\n            _pstatus_map[test.ingesting_pidx] = bulk_load_status::BLS_INGESTING;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_DOWNLOADED);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_DOWNLOADED);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), test.expected_in_process_count);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_ingesting_test)\n{\n    // Test cases:\n    // - all partition not exist\n    // - partition[0~2]=ingesting, partition[3]=downloading\n    // - partition[0~3]=ingesting\n    // - partition[0~3]=succeed\n    // - partition[0~2]=succeed, partition[3]=ingesting\n    struct app_ingesting_test\n    {\n        int32_t end_index;\n        bulk_load_status::type pstatus;\n        bulk_load_status::type p3_status;\n        bool expected_is_bulk_loading;\n        int32_t expected_in_process_count;\n    } tests[] = {{-1, bulk_load_status::BLS_INVALID, bulk_load_status::BLS_INVALID, false, 0},\n                 {2, bulk_load_status::BLS_INGESTING, bulk_load_status::BLS_DOWNLOADING, false, 0},\n                 {3,\n                  bulk_load_status::BLS_INGESTING,\n                  bulk_load_status::BLS_INVALID,\n                  true,\n                  SYNC_PARTITION_COUNT},\n                 {3, bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_INVALID, true, 0},\n                 {2, bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_INGESTING, true, 1}};\n\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(test.pstatus, test.end_index, 0);\n        if (test.p3_status != bulk_load_status::BLS_INVALID) {\n            _pstatus_map[3] = test.p3_status;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_INGESTING);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_INGESTING);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), test.expected_in_process_count);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_succeed_test)\n{\n    // Test cases:\n    // - partition[0~2]=succeed, partition[3] not exist\n    // - partition[0~2]=succeed, partition[3]=failed\n    // - partition[0~3]=succeed\n    struct app_succeed_test\n    {\n        bulk_load_status::type p3_status;\n        bool expected_is_bulk_loading;\n    } tests[] = {{bulk_load_status::BLS_INVALID, false},\n                 {bulk_load_status::BLS_FAILED, false},\n                 {bulk_load_status::BLS_SUCCEED, true}};\n\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(bulk_load_status::BLS_SUCCEED, 2, 0);\n        if (test.p3_status != bulk_load_status::BLS_INVALID) {\n            _pstatus_map[3] = test.p3_status;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_SUCCEED);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_SUCCEED);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), SYNC_PARTITION_COUNT);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_pausing_test)\n{\n    // Test cases:\n    // - partition[0]=pausing, partition[1~3] not existed\n    // - partition[0]=downloading, partition[1]=downloaded, partition[2]=pausing,\n    // partition[3]=paused\n    // - partition[0~3]=pasuing\n    // - partition[0]=pausing, partition[1~3]=paused\n    struct app_pausing_test\n    {\n        bool mixed_status;\n        int32_t start_index;\n        bulk_load_status::type pstatus;\n        bool expected_is_bulk_loading;\n    } tests[] = {{false, -1, bulk_load_status::type::BLS_PAUSING, false},\n                 {true, -1, bulk_load_status::type::BLS_PAUSING, true},\n                 {false, 1, bulk_load_status::type::BLS_PAUSING, true},\n                 {false, 1, bulk_load_status::type::BLS_PAUSED, true}};\n    for (const auto &test : tests) {\n        SetUp();\n        if (test.mixed_status) {\n            _pstatus_map[0] = bulk_load_status::BLS_DOWNLOADING;\n            _pstatus_map[1] = bulk_load_status::BLS_DOWNLOADED;\n            _pstatus_map[2] = bulk_load_status::BLS_PAUSING;\n            _pstatus_map[3] = bulk_load_status::BLS_PAUSED;\n        } else {\n            _pstatus_map[0] = bulk_load_status::BLS_PAUSING;\n            if (test.start_index > 0) {\n                mock_pstatus_map(test.pstatus, 3, test.start_index);\n            }\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_PAUSING);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_PAUSING);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), SYNC_PARTITION_COUNT);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_paused_test)\n{\n    // Test cases:\n    // - partition[0~2]=paused, partition[3] not existed\n    // - partition[0~2]=paused, partition[3]=pausing\n    // - partition[0~3]=paused\n    struct app_paused_test\n    {\n        bulk_load_status::type p3_status;\n        bool expected_is_bulk_loading;\n        int32_t expected_in_process_count;\n    } tests[] = {{bulk_load_status::BLS_INVALID, false},\n                 {bulk_load_status::BLS_PAUSING, false},\n                 {bulk_load_status::BLS_PAUSED, true}};\n\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(bulk_load_status::BLS_PAUSED, 2, 0);\n        if (test.p3_status != bulk_load_status::BLS_INVALID) {\n            _pstatus_map[3] = test.p3_status;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_PAUSED);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_PAUSED);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), SYNC_PARTITION_COUNT);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_failed_test)\n{\n    // Test cases:\n    // - partition[0~2]=failed, partition[3] not existed\n    // - partition[0~3]=failed\n    // - partition[0,1]=downloading, partition[2]=downloaded, partition[3]=failed\n    struct app_failed_test\n    {\n        bool mixed_status;\n        int32_t end_index;\n        bool expected_is_bulk_loading;\n    } tests[] = {{false, 2, false}, {false, 3, true}, {true, -1, true}};\n    for (const auto &test : tests) {\n        SetUp();\n        if (test.mixed_status) {\n            _pstatus_map[0] = bulk_load_status::BLS_DOWNLOADING;\n            _pstatus_map[1] = bulk_load_status::BLS_DOWNLOADING;\n            _pstatus_map[2] = bulk_load_status::BLS_DOWNLOADED;\n            _pstatus_map[3] = bulk_load_status::BLS_FAILED;\n        } else {\n            mock_pstatus_map(bulk_load_status::BLS_FAILED, test.end_index, 0);\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_FAILED);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_FAILED);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), SYNC_PARTITION_COUNT);\n        }\n        TearDown();\n    }\n}\n\nTEST_F(bulk_load_failover_test, app_cancel_test)\n{\n    // Test cases:\n    // - partition[0~2]=pausing, partition[3] not existed\n    // - partition[0~3]=cancel\n    // - partition[0~2]=ingestion, partition[3]=downloaded\n    struct app_cancel_test\n    {\n        bulk_load_status::type pstatus;\n        bulk_load_status::type p3_status;\n        bool expected_is_bulk_loading;\n    } tests[] = {\n        {bulk_load_status::type::BLS_PAUSING, bulk_load_status::type::BLS_INVALID, false},\n        {bulk_load_status::type::BLS_CANCELED, bulk_load_status::type::BLS_CANCELED, true},\n        {bulk_load_status::type::BLS_INGESTING, bulk_load_status::type::BLS_DOWNLOADED, true}};\n    for (const auto &test : tests) {\n        SetUp();\n        mock_pstatus_map(test.pstatus, 2, 0);\n        if (test.p3_status != bulk_load_status::type::BLS_INVALID) {\n            _pstatus_map[3] = test.p3_status;\n        }\n        try_to_continue_bulk_load(bulk_load_status::BLS_CANCELED);\n        ASSERT_EQ(app_is_bulk_loading(SYNC_APP_NAME), test.expected_is_bulk_loading);\n        if (test.expected_is_bulk_loading) {\n            ASSERT_EQ(get_app_bulk_load_status(SYNC_APP_ID), bulk_load_status::BLS_CANCELED);\n            ASSERT_EQ(get_app_in_process_count(SYNC_APP_ID), SYNC_PARTITION_COUNT);\n        }\n        TearDown();\n    }\n}\n\n/// check_app_bulk_load_states unit test\n// create app(is_bulk_loading=true), but no bulk load info on remote storage\nTEST_F(bulk_load_failover_test, status_inconsistency_wrong_app_flag)\n{\n    add_to_app_info_list(SYNC_APP_ID, SYNC_PARTITION_COUNT, SYNC_APP_NAME, true);\n    initialize_meta_server_with_mock_bulk_load(\n        _app_id_set, _app_bulk_load_info_map, _partition_bulk_load_info_map, _app_info_list);\n    bulk_svc().initialize_bulk_load_service();\n    wait_all();\n\n    ASSERT_FALSE(app_is_bulk_loading(SYNC_APP_NAME));\n}\n\n// create app bulk load info on remote storage, but this app not existed\nTEST_F(bulk_load_failover_test, status_inconsistency_wrong_bulk_load_dir)\n{\n    std::unordered_map<int32_t, bulk_load_status::type> partition_bulk_load_status_map;\n    partition_bulk_load_status_map[0] = bulk_load_status::BLS_DOWNLOADING;\n    partition_bulk_load_status_map[1] = bulk_load_status::BLS_DOWNLOADING;\n    prepare_bulk_load_structures(SYNC_APP_ID,\n                                 PARTITION_COUNT,\n                                 SYNC_APP_NAME,\n                                 bulk_load_status::BLS_DOWNLOADING,\n                                 partition_bulk_load_status_map,\n                                 true);\n    _app_info_list.clear();\n    add_to_app_info_list(APP_ID, PARTITION_COUNT, APP_NAME, false);\n\n    initialize_meta_server_with_mock_bulk_load(\n        _app_id_set, _app_bulk_load_info_map, _partition_bulk_load_info_map, _app_info_list);\n    bulk_svc().initialize_bulk_load_service();\n    wait_all();\n\n    ASSERT_TRUE(is_app_bulk_load_states_reset(SYNC_APP_ID));\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_data.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include \"misc/misc.h\"\n#include \"meta/meta_data.h\"\n\nusing namespace dsn::replication;\n\nTEST(meta_data, dropped_cmp)\n{\n    dsn::rpc_address n;\n\n    dropped_replica d1, d2;\n    // time not equal\n    {\n        d1 = {n, 10, 5, 5, 5};\n        d2 = {n, 9, 20, 5, 5};\n        ASSERT_TRUE(dropped_cmp(d1, d2) > 0);\n        ASSERT_TRUE(dropped_cmp(d2, d1) < 0);\n    }\n    // ballot not equal\n    {\n        d1 = {n, 0, 4, 4, 4};\n        d2 = {n, 0, 5, 3, 3};\n\n        ASSERT_TRUE(dropped_cmp(d1, d2) < 0);\n        ASSERT_TRUE(dropped_cmp(d2, d1) > 0);\n    }\n    // last_committed_decree not equal\n    {\n        d1 = {n, 0, 4, 4, 4};\n        d2 = {n, 0, 4, 6, 3};\n\n        ASSERT_TRUE(dropped_cmp(d1, d2) < 0);\n        ASSERT_TRUE(dropped_cmp(d2, d1) > 0);\n    }\n    // last_prepared_deree not equal\n    {\n        d1 = {n, 0, 7, 8, 9};\n        d2 = {n, 0, 7, 8, 10};\n\n        ASSERT_TRUE(dropped_cmp(d1, d2) < 0);\n        ASSERT_TRUE(dropped_cmp(d2, d1) > 0);\n    }\n    // the same\n    {\n        d1 = {n, 0, 6, 6, 7};\n        d2 = {n, 0, 6, 6, 7};\n\n        ASSERT_TRUE(dropped_cmp(d1, d2) == 0);\n        ASSERT_TRUE(dropped_cmp(d2, d1) == 0);\n    }\n}\n\nstatic bool vec_equal(const std::vector<dropped_replica> &vec1,\n                      const std::vector<dropped_replica> &vec2)\n{\n    if (vec1.size() != vec2.size())\n        return false;\n    for (unsigned int i = 0; i != vec1.size(); ++i) {\n        const dropped_replica &ds1 = vec1[i];\n        const dropped_replica &ds2 = vec2[i];\n        if (ds1.ballot != ds2.ballot)\n            return false;\n        if (ds1.last_prepared_decree != ds2.last_prepared_decree)\n            return false;\n        if (ds1.node != ds2.node)\n            return false;\n        if (ds1.time != ds2.time)\n            return false;\n    }\n    return true;\n}\n\nTEST(meta_data, collect_replica)\n{\n    app_mapper app;\n    node_mapper nodes;\n\n    dsn::app_info info;\n    info.app_id = 1;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.max_replica_count = 3;\n    info.partition_count = 1024;\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n    app.emplace(the_app->app_id, the_app);\n    meta_view view = {&app, &nodes};\n\n    replica_info rep;\n    rep.app_type = \"test\";\n    rep.pid = dsn::gpid(1, 0);\n\n    dsn::partition_configuration &pc = *get_config(app, rep.pid);\n    config_context &cc = *get_config_context(app, rep.pid);\n\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 10, 10);\n\n#define CLEAR_REPLICA                                                                              \\\n    do {                                                                                           \\\n        pc.primary.set_invalid();                                                                  \\\n        pc.secondaries.clear();                                                                    \\\n        pc.last_drops.clear();                                                                     \\\n    } while (false)\n\n#define CLEAR_DROP_LIST                                                                            \\\n    do {                                                                                           \\\n        cc.dropped.clear();                                                                        \\\n    } while (false)\n\n#define CLEAR_ALL                                                                                  \\\n    CLEAR_REPLICA;                                                                                 \\\n    CLEAR_DROP_LIST\n\n    {\n        // replica is primary of partition\n        CLEAR_ALL;\n        rep.ballot = 10;\n        pc.ballot = 9;\n        pc.primary = node_list[0];\n        ASSERT_TRUE(collect_replica(view, node_list[0], rep));\n    }\n\n    {\n        // replica is secondary of partition\n        CLEAR_ALL;\n        pc.secondaries.push_back(node_list[0]);\n        ASSERT_TRUE(collect_replica(view, node_list[0], rep));\n    }\n\n    {\n        // replica has been in the drop_list\n        CLEAR_ALL;\n        cc.dropped.push_back({node_list[0], 5, 0, 0});\n        ASSERT_TRUE(collect_replica(view, node_list[0], rep));\n    }\n\n    {\n        // drop_list all have timestamp, full\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], 5, 1, 1, 2},\n            dropped_replica{node_list[1], 6, 1, 1, 2},\n            dropped_replica{node_list[2], 7, 1, 1, 2},\n            dropped_replica{node_list[3], 8, 1, 1, 2},\n        };\n        rep.ballot = 10;\n        rep.last_prepared_decree = 10;\n        ASSERT_FALSE(collect_replica(view, node_list[5], rep));\n    }\n\n    {\n        // drop_list all have timestamp, not full\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], 5, 1, 1, 2},\n            dropped_replica{node_list[1], 6, 1, 1, 2},\n            dropped_replica{node_list[2], 7, 1, 1, 2},\n        };\n        rep.ballot = 10;\n        rep.last_durable_decree = 6;\n        rep.last_committed_decree = 8;\n        rep.last_prepared_decree = 10;\n\n        ASSERT_TRUE(collect_replica(view, node_list[4], rep));\n        dropped_replica &d = cc.dropped.front();\n        ASSERT_EQ(d.ballot, rep.ballot);\n        ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree);\n    }\n\n    {\n        // drop_list mixed, full, minimal position\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5},\n            dropped_replica{node_list[2], 7, 1, 1, 5},\n            dropped_replica{node_list[3], 8, 1, 1, 5},\n        };\n\n        rep.ballot = 1;\n        rep.last_committed_decree = 3;\n        rep.last_prepared_decree = 5;\n        ASSERT_FALSE(collect_replica(view, node_list[5], rep));\n    }\n\n    {\n        // drop_list mixed, not full, minimal position\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 3, 5},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 5},\n            dropped_replica{node_list[2], 7, 1, 1, 6},\n        };\n\n        rep.ballot = 1;\n        rep.last_committed_decree = 3;\n        rep.last_prepared_decree = 5;\n        ASSERT_TRUE(collect_replica(view, node_list[5], rep));\n        dropped_replica &d = cc.dropped.front();\n        ASSERT_EQ(d.node, node_list[5]);\n        ASSERT_EQ(d.ballot, rep.ballot);\n        ASSERT_EQ(d.last_prepared_decree, rep.last_prepared_decree);\n    }\n\n    {\n        // drop_list mixed, full, not minimal position\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6},\n            dropped_replica{node_list[2], 7, 1, 1, 6},\n            dropped_replica{node_list[3], 8, 1, 1, 6},\n        };\n\n        rep.ballot = 2;\n        rep.last_committed_decree = 3;\n        rep.last_prepared_decree = 6;\n        ASSERT_TRUE(collect_replica(view, node_list[5], rep));\n        dropped_replica &d = cc.dropped.front();\n        ASSERT_EQ(rep.ballot, d.ballot);\n        ASSERT_EQ(rep.last_committed_decree, rep.last_committed_decree);\n\n        ASSERT_EQ(4, cc.dropped[1].last_committed_decree);\n    }\n\n    {\n        // drop_list mixed, not full, not minimal position\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6},\n                      dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6},\n                      dropped_replica{node_list[2], 7, 1, 1, 6}};\n\n        rep.ballot = 3;\n        rep.last_committed_decree = 1;\n        rep.last_prepared_decree = 6;\n        ASSERT_TRUE(collect_replica(view, node_list[5], rep));\n\n        std::vector<dropped_replica> result_dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 6},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 6},\n            dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 1, 6},\n            dropped_replica{node_list[2], 7, 1, 1, 6}};\n\n        ASSERT_TRUE(vec_equal(result_dropped, cc.dropped));\n    }\n\n    {\n        // drop_list no timestamp, full, minimal position\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8},\n            dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8},\n            dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8},\n        };\n\n        rep.ballot = 1;\n        rep.last_committed_decree = 7;\n        rep.last_prepared_decree = 10;\n        ASSERT_FALSE(collect_replica(view, node_list[5], rep));\n    }\n\n    {\n        // drop_list no timestamp, full, middle position\n        CLEAR_ALL;\n        cc.dropped = {\n            dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 2, 2, 8},\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8},\n            dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8},\n            dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8},\n        };\n\n        rep.ballot = 3;\n        rep.last_committed_decree = 6;\n        rep.last_prepared_decree = 8;\n        ASSERT_TRUE(collect_replica(view, node_list[5], rep));\n\n        std::vector<dropped_replica> result_dropped = {\n            dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8},\n            dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8},\n            dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 3, 6, 8},\n            dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8},\n        };\n\n        ASSERT_TRUE(vec_equal(result_dropped, cc.dropped));\n    }\n\n    {\n        // drop_list no timestamp, full, largest position\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 2, 4, 8},\n                      dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8},\n                      dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8},\n                      dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8}};\n\n        rep.ballot = 4;\n        rep.last_committed_decree = 8;\n        rep.last_prepared_decree = 8;\n        ASSERT_TRUE(collect_replica(view, node_list[5], rep));\n\n        std::vector<dropped_replica> result_dropped = {\n            dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 2, 6, 8},\n            dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 4, 2, 8},\n            dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 4, 6, 8},\n            dropped_replica{node_list[5], dropped_replica::INVALID_TIMESTAMP, 4, 8, 8}};\n\n        ASSERT_TRUE(vec_equal(result_dropped, cc.dropped));\n    }\n#undef CLEAR_ALL\n#undef CLEAR_REPLICA\n#undef CLEAR_DROP_LIST\n}\n\nTEST(meta_data, construct_replica)\n{\n    app_mapper app;\n    node_mapper nodes;\n\n    dsn::app_info info;\n    info.app_id = 1;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.max_replica_count = 3;\n    info.partition_count = 1024;\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n    app.emplace(the_app->app_id, the_app);\n    meta_view view = {&app, &nodes};\n\n    replica_info rep;\n    rep.app_type = \"test\";\n    rep.pid = dsn::gpid(1, 0);\n\n    dsn::partition_configuration &pc = *get_config(app, rep.pid);\n    config_context &cc = *get_config_context(app, rep.pid);\n\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 10, 10);\n\n#define CLEAR_REPLICA                                                                              \\\n    do {                                                                                           \\\n        pc.primary.set_invalid();                                                                  \\\n        pc.secondaries.clear();                                                                    \\\n        pc.last_drops.clear();                                                                     \\\n    } while (false)\n\n#define CLEAR_DROP_LIST                                                                            \\\n    do {                                                                                           \\\n        cc.dropped.clear();                                                                        \\\n    } while (false)\n\n#define CLEAR_ALL                                                                                  \\\n    CLEAR_REPLICA;                                                                                 \\\n    CLEAR_DROP_LIST\n\n    // drop_list is empty, can't construct replica\n    {\n        CLEAR_ALL;\n        ASSERT_FALSE(construct_replica(view, rep.pid, 3));\n        ASSERT_EQ(0, replica_count(pc));\n    }\n\n    // only have one node in drop_list\n    {\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12}};\n        ASSERT_TRUE(construct_replica(view, rep.pid, 3));\n        ASSERT_EQ(node_list[0], pc.primary);\n        ASSERT_TRUE(pc.secondaries.empty());\n        ASSERT_TRUE(cc.dropped.empty());\n        ASSERT_EQ(-1, cc.prefered_dropped);\n    }\n\n    // have multiple nodes, ballots are not same\n    {\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 6, 10, 12},\n                      dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 10, 12},\n                      dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 8, 10, 12},\n                      dropped_replica{node_list[4], dropped_replica::INVALID_TIMESTAMP, 9, 11, 12}};\n        ASSERT_TRUE(construct_replica(view, rep.pid, 3));\n        ASSERT_EQ(node_list[4], pc.primary);\n        ASSERT_TRUE(pc.secondaries.empty());\n\n        std::vector<dsn::rpc_address> nodes = {node_list[2], node_list[3]};\n        ASSERT_EQ(nodes, pc.last_drops);\n        ASSERT_EQ(3, cc.dropped.size());\n        ASSERT_EQ(2, cc.prefered_dropped);\n    }\n\n    // have multiple node, two have same ballots\n    {\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 5, 10, 12},\n                      dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 11, 12},\n                      dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 12, 12}};\n\n        ASSERT_TRUE(construct_replica(view, rep.pid, 3));\n        ASSERT_EQ(node_list[2], pc.primary);\n        ASSERT_TRUE(pc.secondaries.empty());\n\n        std::vector<dsn::rpc_address> nodes = {node_list[0], node_list[1]};\n        ASSERT_EQ(nodes, pc.last_drops);\n        ASSERT_EQ(2, cc.dropped.size());\n        ASSERT_EQ(1, cc.prefered_dropped);\n    }\n\n    // have multiple nodes, all have same ballots\n    {\n        CLEAR_ALL;\n        cc.dropped = {dropped_replica{node_list[0], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14},\n                      dropped_replica{node_list[1], dropped_replica::INVALID_TIMESTAMP, 7, 12, 14},\n                      dropped_replica{node_list[2], dropped_replica::INVALID_TIMESTAMP, 7, 13, 14},\n                      dropped_replica{node_list[3], dropped_replica::INVALID_TIMESTAMP, 7, 14, 14}};\n\n        ASSERT_TRUE(construct_replica(view, rep.pid, 3));\n        ASSERT_EQ(node_list[3], pc.primary);\n        ASSERT_TRUE(pc.secondaries.empty());\n\n        std::vector<dsn::rpc_address> nodes = {node_list[1], node_list[2]};\n        ASSERT_EQ(nodes, pc.last_drops);\n\n        ASSERT_EQ(3, cc.dropped.size());\n        ASSERT_EQ(2, cc.prefered_dropped);\n    }\n}\n"
  },
  {
    "path": "src/meta/test/meta_duplication_service_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/common.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"meta/server_load_balancer.h\"\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/meta_http_service.h\"\n#include \"meta/duplication/meta_duplication_service.h\"\n#include \"meta/test/misc/misc.h\"\n\n#include \"meta_service_test_app.h\"\n#include \"meta_test_base.h\"\n#include \"dsn/utility/fail_point.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_duplication_service_test : public meta_test_base\n{\npublic:\n    meta_duplication_service_test() {}\n\n    duplication_add_response create_dup(const std::string &app_name,\n                                        const std::string &remote_cluster = \"slave-cluster\",\n                                        bool freezed = false)\n    {\n        auto req = make_unique<duplication_add_request>();\n        req->app_name = app_name;\n        req->remote_cluster_name = remote_cluster;\n\n        duplication_add_rpc rpc(std::move(req), RPC_CM_ADD_DUPLICATION);\n        dup_svc().add_duplication(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    duplication_query_response query_dup_info(const std::string &app_name)\n    {\n        auto req = make_unique<duplication_query_request>();\n        req->app_name = app_name;\n\n        duplication_query_rpc rpc(std::move(req), RPC_CM_QUERY_DUPLICATION);\n        dup_svc().query_duplication_info(rpc.request(), rpc.response());\n\n        return rpc.response();\n    }\n\n    duplication_modify_response\n    change_dup_status(const std::string &app_name, dupid_t dupid, duplication_status::type status)\n    {\n        auto req = make_unique<duplication_modify_request>();\n        req->dupid = dupid;\n        req->app_name = app_name;\n        req->__set_status(status);\n\n        duplication_modify_rpc rpc(std::move(req), RPC_CM_MODIFY_DUPLICATION);\n        dup_svc().modify_duplication(rpc);\n        wait_all();\n\n        return rpc.response();\n    }\n\n    duplication_modify_response\n    update_fail_mode(const std::string &app_name, dupid_t dupid, duplication_fail_mode::type fmode)\n    {\n        auto req = make_unique<duplication_modify_request>();\n        req->dupid = dupid;\n        req->app_name = app_name;\n        req->__set_fail_mode(fmode);\n\n        duplication_modify_rpc rpc(std::move(req), RPC_CM_MODIFY_DUPLICATION);\n        dup_svc().modify_duplication(rpc);\n        wait_all();\n\n        return rpc.response();\n    }\n\n    duplication_sync_response\n    duplication_sync(const rpc_address &node,\n                     std::map<gpid, std::vector<duplication_confirm_entry>> confirm_list)\n    {\n        auto req = make_unique<duplication_sync_request>();\n        req->node = node;\n        req->confirm_list = confirm_list;\n\n        duplication_sync_rpc rpc(std::move(req), RPC_CM_DUPLICATION_SYNC);\n        dup_svc().duplication_sync(rpc);\n        wait_all();\n\n        return rpc.response();\n    }\n\n    void recover_from_meta_state()\n    {\n        dup_svc().recover_from_meta_state();\n        wait_all();\n    }\n\n    void create_follower_app_for_duplication(const std::shared_ptr<duplication_info> &dup,\n                                             const std::shared_ptr<app_state> &app)\n    {\n        dup_svc().create_follower_app_for_duplication(dup, app);\n    }\n\n    void check_follower_app_if_create_completed(const std::shared_ptr<duplication_info> &dup)\n    {\n        dup_svc().check_follower_app_if_create_completed(dup);\n    }\n\n    duplication_status::type next_status(const std::shared_ptr<duplication_info> &dup) const\n    {\n        return dup->_next_status;\n    }\n\n    void force_update_dup_status(const std::shared_ptr<duplication_info> &dup,\n                                 duplication_status::type to_status)\n    {\n        dup->_status = to_status;\n    }\n\n    /// === Tests ===\n\n    void test_new_dup_from_init()\n    {\n        std::string test_app = \"test-app\";\n        create_app(test_app);\n        auto app = find_app(test_app);\n        std::string remote_cluster_address = \"dsn://slave-cluster/temp\";\n\n        int last_dup = 0;\n        for (int i = 0; i < 1000; i++) {\n            auto dup = dup_svc().new_dup_from_init(\n                remote_cluster_address, std::vector<rpc_address>(), app);\n\n            ASSERT_GT(dup->id, 0);\n            ASSERT_FALSE(dup->is_altering());\n            ASSERT_EQ(dup->_status, duplication_status::DS_INIT);\n            ASSERT_EQ(dup->_next_status, duplication_status::DS_INIT);\n\n            auto ent = dup->to_duplication_entry();\n            for (int j = 0; j < app->partition_count; j++) {\n                ASSERT_EQ(ent.progress[j], invalid_decree);\n            }\n\n            if (last_dup != 0) {\n                ASSERT_GT(dup->id, last_dup);\n            }\n            last_dup = dup->id;\n        }\n    }\n\n    void test_recover_from_meta_state()\n    {\n        size_t total_apps_num = 2;\n        std::vector<std::string> test_apps(total_apps_num);\n\n        // app -> <dupid -> dup>\n        std::map<std::string, std::map<dupid_t, duplication_info_s_ptr>> app_to_duplications;\n\n        for (int i = 0; i < total_apps_num; i++) {\n            test_apps[i] = \"test_app_\" + std::to_string(i);\n            create_app(test_apps[i]);\n\n            auto resp = create_dup(test_apps[i]);\n            ASSERT_EQ(ERR_OK, resp.err);\n\n            auto app = find_app(test_apps[i]);\n            app_to_duplications[test_apps[i]] = app->duplications;\n\n            // update progress\n            auto dup = app->duplications[resp.dupid];\n            duplication_sync_rpc rpc(make_unique<duplication_sync_request>(),\n                                     RPC_CM_DUPLICATION_SYNC);\n            duplication_confirm_entry entry;\n            entry.confirmed_decree = 1000;\n            dup_svc().do_update_partition_confirmed(dup, rpc, 1, entry);\n            wait_all();\n\n            entry.confirmed_decree = 2000;\n            dup_svc().do_update_partition_confirmed(dup, rpc, 2, entry);\n            wait_all();\n\n            entry.confirmed_decree = 1000;\n            dup_svc().do_update_partition_confirmed(dup, rpc, 4, entry);\n            wait_all();\n        }\n\n        // reset meta server states\n        SetUp();\n\n        recover_from_meta_state();\n\n        for (int i = 0; i < test_apps.size(); i++) {\n            auto app = find_app(test_apps[i]);\n            ASSERT_EQ(app->duplicating, true);\n\n            auto &before = app_to_duplications[test_apps[i]];\n            auto &after = app->duplications;\n            ASSERT_EQ(before.size(), after.size());\n\n            for (auto &kv : before) {\n                dupid_t dupid = kv.first;\n                auto &dup = kv.second;\n\n                ASSERT_TRUE(after.find(dupid) != after.end());\n                ASSERT_TRUE(dup->equals_to(*after[dupid])) << dup->to_string() << std::endl\n                                                           << after[dupid]->to_string();\n            }\n        }\n    }\n\n    std::shared_ptr<app_state> mock_test_case_and_recover(std::vector<std::string> nodes,\n                                                          std::string value)\n    {\n        TearDown();\n        SetUp();\n\n        std::string test_app = \"test-app\";\n        create_app(test_app);\n        auto app = find_app(test_app);\n        std::string remote_cluster_address = \"dsn://slave-cluster/temp\";\n\n        std::queue<std::string> q_nodes;\n        for (auto n : nodes) {\n            q_nodes.push(std::move(n));\n        }\n        _ms->get_meta_storage()->create_node_recursively(\n            std::move(q_nodes), blob::create_from_bytes(std::move(value)), []() mutable {});\n        wait_all();\n\n        SetUp();\n        recover_from_meta_state();\n\n        return find_app(test_app);\n    }\n\n    // Corrupted meta data may result from bad write to meta-store.\n    // This test ensures meta-server is still able to recover when\n    // meta data is corrupted.\n    void test_recover_from_corrupted_meta_data()\n    {\n        std::string test_app = \"test-app\";\n        create_app(test_app);\n        auto app = find_app(test_app);\n\n        // recover from /<app>/dup\n        app = mock_test_case_and_recover({_ss->get_app_path(*app), std::string(\"dup\")}, \"\");\n        ASSERT_FALSE(app->duplicating);\n        ASSERT_TRUE(app->duplications.empty());\n\n        // recover from /<app>/duplication/xxx/\n        app = mock_test_case_and_recover({dup_svc().get_duplication_path(*app), std::string(\"xxx\")},\n                                         \"\");\n        ASSERT_FALSE(app->duplicating);\n        ASSERT_TRUE(app->duplications.empty());\n\n        // recover from /<app>/duplication/123/, but its value is empty\n        app = mock_test_case_and_recover({dup_svc().get_duplication_path(*app), std::string(\"123\")},\n                                         \"\");\n        ASSERT_FALSE(app->duplicating);\n        ASSERT_TRUE(app->duplications.empty());\n\n        // recover from /<app>/duplication/<dup_id>/0, but its confirmed_decree is not valid integer\n        TearDown();\n        SetUp();\n        create_app(test_app);\n        app = find_app(test_app);\n        auto test_dup = create_dup(test_app, \"slave-cluster\", true);\n        ASSERT_EQ(test_dup.err, ERR_OK);\n        duplication_info_s_ptr dup = app->duplications[test_dup.dupid];\n        _ms->get_meta_storage()->create_node(meta_duplication_service::get_partition_path(dup, \"0\"),\n                                             blob::create_from_bytes(\"xxx\"),\n                                             []() mutable {});\n        wait_all();\n        SetUp();\n        recover_from_meta_state();\n        app = find_app(test_app);\n        ASSERT_TRUE(app->duplicating);\n        ASSERT_EQ(app->duplications.size(), 1);\n        for (int i = 0; i < app->partition_count; i++) {\n            ASSERT_EQ(app->duplications[test_dup.dupid]->_progress[i].is_inited, i != 0);\n        }\n\n        // recover from /<app>/duplication/<dup_id>/x, its pid is not valid integer\n        TearDown();\n        SetUp();\n        create_app(test_app);\n        app = find_app(test_app);\n        test_dup = create_dup(test_app, \"slave-cluster\", true);\n        ASSERT_EQ(test_dup.err, ERR_OK);\n        dup = app->duplications[test_dup.dupid];\n        _ms->get_meta_storage()->create_node(meta_duplication_service::get_partition_path(dup, \"x\"),\n                                             blob::create_from_bytes(\"xxx\"),\n                                             []() mutable {});\n        wait_all();\n        SetUp();\n        recover_from_meta_state();\n        ASSERT_TRUE(app->duplicating);\n        ASSERT_EQ(app->duplications.size(), 1);\n        for (int i = 0; i < app->partition_count; i++) {\n            ASSERT_EQ(app->duplications[test_dup.dupid]->_progress[i].is_inited, true);\n        }\n    }\n\n    void test_add_duplication()\n    {\n        std::string test_app = \"test-app\";\n        std::string test_app_invalid_ver = \"test-app-invalid-ver\";\n\n        std::string invalid_remote = \"test-invalid-remote\";\n        std::string ok_remote = \"slave-cluster\";\n\n        std::string cluster_without_address = \"cluster_without_address_for_test\";\n\n        create_app(test_app);\n\n        create_app(test_app_invalid_ver);\n        find_app(test_app_invalid_ver)->envs[\"value_version\"] = \"0\";\n\n        struct TestData\n        {\n            std::string app;\n            std::string remote;\n\n            error_code wec;\n        } tests[] = {\n            //        {test_app_invalid_ver, ok_remote, ERR_INVALID_VERSION},\n\n            {test_app, ok_remote, ERR_OK},\n\n            {test_app, invalid_remote, ERR_INVALID_PARAMETERS},\n\n            {test_app, get_current_cluster_name(), ERR_INVALID_PARAMETERS},\n\n            {test_app, cluster_without_address, ERR_INVALID_PARAMETERS},\n        };\n\n        for (auto tt : tests) {\n            auto resp = create_dup(tt.app, tt.remote);\n            ASSERT_EQ(tt.wec, resp.err);\n\n            if (tt.wec == ERR_OK) {\n                auto app = find_app(test_app);\n                auto dup = app->duplications[resp.dupid];\n                ASSERT_TRUE(dup != nullptr);\n                ASSERT_EQ(dup->app_id, app->app_id);\n                ASSERT_EQ(dup->_status, duplication_status::DS_PREPARE);\n                ASSERT_EQ(dup->follower_cluster_name, ok_remote);\n                ASSERT_EQ(resp.dupid, dup->id);\n                ASSERT_EQ(app->duplicating, true);\n            }\n        }\n    }\n};\n\n// This test ensures that duplication upon an unavailable app will\n// be rejected with ERR_APP_NOT_EXIST.\nTEST_F(meta_duplication_service_test, dup_op_upon_unavail_app)\n{\n    std::string test_app = \"test-app\";\n    std::string test_app_not_exist = \"test-app-not-exists\";\n    std::string test_app_unavail = \"test-app-unavail\";\n\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    create_app(test_app_unavail);\n    find_app(test_app_unavail)->status = app_status::AS_DROPPED;\n\n    dupid_t test_dup = create_dup(test_app).dupid;\n\n    struct TestData\n    {\n        std::string app;\n\n        error_code wec;\n    } tests[] = {\n        {test_app_not_exist, ERR_APP_NOT_EXIST},\n        {test_app_unavail, ERR_APP_NOT_EXIST},\n\n        {test_app, ERR_OK},\n    };\n\n    for (auto tt : tests) {\n        ASSERT_EQ(query_dup_info(tt.app).err, tt.wec);\n        ASSERT_EQ(create_dup(tt.app).err, tt.wec);\n        ASSERT_EQ(change_dup_status(tt.app, test_dup, duplication_status::DS_REMOVED).err, tt.wec);\n    }\n}\n\nTEST_F(meta_duplication_service_test, add_duplication) { test_add_duplication(); }\n\n// Ensure meta server never creates another dup to the same remote cluster and app,\n// if there's already one existed.\nTEST_F(meta_duplication_service_test, dont_create_if_existed)\n{\n    std::string test_app = \"test-app\";\n\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    create_dup(test_app);\n    create_dup(test_app);\n    dupid_t dupid = create_dup(test_app).dupid;\n\n    {\n        auto resp = query_dup_info(test_app);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_EQ(resp.entry_list.size(), 1);\n\n        const auto &duplication_entry = resp.entry_list.back();\n        ASSERT_EQ(duplication_entry.status, duplication_status::DS_PREPARE);\n        ASSERT_EQ(duplication_entry.dupid, dupid);\n    }\n}\n\nTEST_F(meta_duplication_service_test, change_duplication_status)\n{\n    std::string test_app = \"test-app\";\n\n    create_app(test_app);\n    auto app = find_app(test_app);\n    dupid_t test_dup = create_dup(test_app).dupid;\n    change_dup_status(test_app, test_dup, duplication_status::DS_APP);\n    change_dup_status(test_app, test_dup, duplication_status::DS_LOG);\n\n    struct TestData\n    {\n        std::string app;\n        dupid_t dupid;\n        duplication_status::type status;\n\n        error_code wec;\n    } tests[] = {\n        {test_app, test_dup + 1, duplication_status::DS_REMOVED, ERR_OBJECT_NOT_FOUND},\n\n        // ok test\n        {test_app, test_dup, duplication_status::DS_PAUSE, ERR_OK}, // start->pause\n        {test_app, test_dup, duplication_status::DS_PAUSE, ERR_OK}, // pause->pause\n        {test_app, test_dup, duplication_status::DS_LOG, ERR_OK},   // pause->start\n        {test_app, test_dup, duplication_status::DS_LOG, ERR_OK},   // start->start\n    };\n\n    for (auto tt : tests) {\n        auto resp = change_dup_status(tt.app, tt.dupid, tt.status);\n        ASSERT_EQ(resp.err, tt.wec);\n    }\n}\n\n// this test ensures that dupid is always increment and larger than zero.\nTEST_F(meta_duplication_service_test, new_dup_from_init) { test_new_dup_from_init(); }\n\nTEST_F(meta_duplication_service_test, remove_dup)\n{\n    std::string test_app = \"test-app\";\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    auto resp = create_dup(test_app);\n    ASSERT_EQ(ERR_OK, resp.err);\n    dupid_t dupid1 = resp.dupid;\n\n    ASSERT_EQ(app->duplicating, true);\n    auto dup = app->duplications.find(dupid1)->second;\n\n    auto resp2 = change_dup_status(test_app, dupid1, duplication_status::DS_REMOVED);\n    ASSERT_EQ(ERR_OK, resp2.err);\n    // though this duplication is unreferenced, its status still updated correctly\n    ASSERT_EQ(dup->status(), duplication_status::DS_REMOVED);\n\n    ASSERT_EQ(app->duplicating, false);\n    // ensure duplication removed\n    ASSERT_EQ(app->duplications.find(dupid1), app->duplications.end());\n    _ms->get_meta_storage()->get_children(std::string(dup->store_path),\n                                          [](bool node_exists, const std::vector<std::string> &) {\n                                              // ensure node cleaned up\n                                              ASSERT_FALSE(node_exists);\n                                          });\n\n    // reset meta server states\n    SetUp();\n    recover_from_meta_state();\n\n    ASSERT_EQ(app->duplicating, false);\n    ASSERT_EQ(app->duplications.find(dupid1), app->duplications.end());\n}\n\nTEST_F(meta_duplication_service_test, duplication_sync)\n{\n    std::vector<rpc_address> server_nodes = ensure_enough_alive_nodes(3);\n    rpc_address node = server_nodes[0];\n\n    std::string test_app = \"test_app_0\";\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    // generate all primaries on node[0]\n    for (partition_configuration &pc : app->partitions) {\n        pc.ballot = random32(1, 10000);\n        pc.primary = server_nodes[0];\n        pc.secondaries.push_back(server_nodes[1]);\n        pc.secondaries.push_back(server_nodes[2]);\n    }\n\n    initialize_node_state();\n\n    dupid_t dupid = create_dup(test_app).dupid;\n    auto dup = app->duplications[dupid];\n    for (int i = 0; i < app->partition_count; i++) {\n        dup->init_progress(i, invalid_decree);\n    }\n    {\n        std::map<gpid, std::vector<duplication_confirm_entry>> confirm_list;\n\n        duplication_confirm_entry ce;\n        ce.dupid = dupid;\n\n        ce.confirmed_decree = 5;\n        confirm_list[gpid(app->app_id, 1)].push_back(ce);\n\n        ce.confirmed_decree = 6;\n        confirm_list[gpid(app->app_id, 2)].push_back(ce);\n\n        ce.confirmed_decree = 7;\n        confirm_list[gpid(app->app_id, 3)].push_back(ce);\n\n        duplication_sync_response resp = duplication_sync(node, confirm_list);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_EQ(resp.dup_map.size(), 1);\n        ASSERT_EQ(resp.dup_map[app->app_id].size(), 1);\n        ASSERT_EQ(resp.dup_map[app->app_id][dupid].dupid, dupid);\n        ASSERT_EQ(resp.dup_map[app->app_id][dupid].status, duplication_status::DS_PREPARE);\n        ASSERT_EQ(resp.dup_map[app->app_id][dupid].create_ts, dup->create_timestamp_ms);\n        ASSERT_EQ(resp.dup_map[app->app_id][dupid].remote, dup->follower_cluster_name);\n        ASSERT_EQ(resp.dup_map[app->app_id][dupid].fail_mode, dup->fail_mode());\n\n        auto progress_map = resp.dup_map[app->app_id][dupid].progress;\n        ASSERT_EQ(progress_map.size(), 8);\n        ASSERT_EQ(progress_map[1], 5);\n        ASSERT_EQ(progress_map[2], 6);\n        ASSERT_EQ(progress_map[3], 7);\n\n        // ensure no updated progresses will also be included in response\n        for (int p = 4; p < 8; p++) {\n            ASSERT_EQ(progress_map[p], invalid_decree);\n        }\n        ASSERT_EQ(progress_map[0], invalid_decree);\n    }\n\n    { // duplication not existed will be ignored\n        std::map<gpid, std::vector<duplication_confirm_entry>> confirm_list;\n\n        duplication_confirm_entry ce;\n        ce.dupid = dupid + 1; // not created\n        ce.confirmed_decree = 5;\n        confirm_list[gpid(app->app_id, 1)].push_back(ce);\n\n        duplication_sync_response resp = duplication_sync(node, confirm_list);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_EQ(resp.dup_map.size(), 1);\n        ASSERT_TRUE(resp.dup_map[app->app_id].find(dupid + 1) == resp.dup_map[app->app_id].end());\n    }\n\n    { // app not existed will be ignored\n        std::map<gpid, std::vector<duplication_confirm_entry>> confirm_list;\n\n        duplication_confirm_entry ce;\n        ce.dupid = dupid;\n        ce.confirmed_decree = 5;\n        confirm_list[gpid(app->app_id + 1, 1)].push_back(ce);\n\n        duplication_sync_response resp = duplication_sync(node, confirm_list);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_EQ(resp.dup_map.size(), 1);\n        ASSERT_TRUE(resp.dup_map.find(app->app_id + 1) == resp.dup_map.end());\n    }\n\n    { // duplication removed will be ignored\n        change_dup_status(test_app, dupid, duplication_status::DS_REMOVED);\n\n        std::map<gpid, std::vector<duplication_confirm_entry>> confirm_list;\n\n        duplication_confirm_entry ce;\n        ce.dupid = dupid;\n        ce.confirmed_decree = 5;\n        confirm_list[gpid(app->app_id, 1)].push_back(ce);\n\n        duplication_sync_response resp = duplication_sync(node, confirm_list);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_EQ(resp.dup_map.size(), 0);\n    }\n}\n\n// This test ensures that duplications persisted on meta storage can be\n// correctly restored.\nTEST_F(meta_duplication_service_test, recover_from_meta_state) { test_recover_from_meta_state(); }\n\nTEST_F(meta_duplication_service_test, query_duplication_info)\n{\n    std::string test_app = \"test-app\";\n\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    dupid_t test_dup = create_dup(test_app).dupid;\n    change_dup_status(test_app, test_dup, duplication_status::DS_PAUSE);\n\n    auto resp = query_dup_info(test_app);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_EQ(resp.entry_list.size(), 1);\n    ASSERT_EQ(resp.entry_list.back().status, duplication_status::DS_PREPARE);\n    ASSERT_EQ(resp.entry_list.back().dupid, test_dup);\n    ASSERT_EQ(resp.appid, app->app_id);\n\n    change_dup_status(test_app, test_dup, duplication_status::DS_REMOVED);\n    resp = query_dup_info(test_app);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_EQ(resp.entry_list.size(), 0);\n}\n\nTEST_F(meta_duplication_service_test, re_add_duplication)\n{\n    std::string test_app = \"test-app\";\n\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    auto test_dup = create_dup(test_app);\n    ASSERT_EQ(test_dup.err, ERR_OK);\n    ASSERT_TRUE(app->duplications[test_dup.dupid] != nullptr);\n    auto resp = change_dup_status(test_app, test_dup.dupid, duplication_status::DS_REMOVED);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_TRUE(app->duplications.find(test_dup.dupid) == app->duplications.end());\n\n    sleep(1);\n\n    auto test_dup_2 = create_dup(test_app);\n    ASSERT_EQ(test_dup_2.appid, app->app_id);\n    ASSERT_EQ(test_dup_2.err, ERR_OK);\n\n    // once duplication is removed, all its state is not valid anymore.\n    ASSERT_EQ(app->duplications.size(), 1);\n    ASSERT_NE(test_dup.dupid, test_dup_2.dupid);\n\n    auto dup_list = query_dup_info(test_app).entry_list;\n    ASSERT_EQ(dup_list.size(), 1);\n    ASSERT_EQ(dup_list.begin()->status, duplication_status::DS_PREPARE);\n    ASSERT_EQ(dup_list.begin()->dupid, test_dup_2.dupid);\n\n    // reset meta server states\n    SetUp();\n\n    recover_from_meta_state();\n    app = find_app(test_app);\n    ASSERT_TRUE(app->duplications.find(test_dup.dupid) == app->duplications.end());\n    ASSERT_EQ(app->duplications.size(), 1);\n}\n\nTEST_F(meta_duplication_service_test, recover_from_corrupted_meta_data)\n{\n    test_recover_from_corrupted_meta_data();\n}\n\nTEST_F(meta_duplication_service_test, query_duplication_handler)\n{\n    std::string test_app = \"test-app\";\n    create_app(test_app);\n    create_dup(test_app);\n    meta_http_service mhs(_ms.get());\n\n    http_request fake_req;\n    http_response fake_resp;\n    fake_req.query_args[\"name\"] = test_app + \"not-found\";\n    mhs.query_duplication_handler(fake_req, fake_resp);\n    ASSERT_EQ(fake_resp.status_code, http_status_code::not_found);\n\n    const auto &duplications = find_app(test_app)->duplications;\n    ASSERT_EQ(duplications.size(), 1);\n    auto dup = duplications.begin()->second;\n\n    fake_req.query_args[\"name\"] = test_app;\n    mhs.query_duplication_handler(fake_req, fake_resp);\n    ASSERT_EQ(fake_resp.status_code, http_status_code::ok);\n    char ts_buf[32];\n    utils::time_ms_to_date_time(\n        static_cast<uint64_t>(dup->create_timestamp_ms), ts_buf, sizeof(ts_buf));\n    ASSERT_EQ(fake_resp.body,\n              std::string() + R\"({\"1\":{\"create_ts\":\")\" + ts_buf + R\"(\",\"dupid\":)\" +\n                  std::to_string(dup->id) +\n                  R\"(,\"fail_mode\":\"FAIL_SLOW\")\"\n                  R\"(,\"remote\":\"slave-cluster\",\"status\":\"DS_PREPARE\"},\"appid\":2})\");\n}\n\nTEST_F(meta_duplication_service_test, fail_mode)\n{\n    std::string test_app = \"test-app\";\n    create_app(test_app);\n    auto app = find_app(test_app);\n\n    auto dup_add_resp = create_dup(test_app);\n    auto dup = app->duplications[dup_add_resp.dupid];\n    ASSERT_EQ(dup->fail_mode(), duplication_fail_mode::FAIL_SLOW);\n    ASSERT_EQ(dup->status(), duplication_status::DS_PREPARE);\n\n    auto resp = update_fail_mode(test_app, dup->id, duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_EQ(dup->fail_mode(), duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(dup->status(), duplication_status::DS_PREPARE);\n\n    // change nothing\n    resp = update_fail_mode(test_app, dup->id, duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_EQ(dup->fail_mode(), duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(dup->status(), duplication_status::DS_PREPARE);\n\n    // change status but fail mode not changed\n    change_dup_status(test_app, dup->id, duplication_status::DS_APP);\n    change_dup_status(test_app, dup->id, duplication_status::DS_LOG);\n    resp = change_dup_status(test_app, dup->id, duplication_status::DS_PAUSE);\n    ASSERT_EQ(resp.err, ERR_OK);\n    ASSERT_EQ(dup->fail_mode(), duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(dup->status(), duplication_status::DS_PAUSE);\n\n    // ensure dup_sync will synchronize fail_mode\n    std::vector<rpc_address> server_nodes = generate_node_list(3);\n    rpc_address node = server_nodes[0];\n    for (partition_configuration &pc : app->partitions) {\n        pc.primary = server_nodes[0];\n    }\n    initialize_node_state();\n    duplication_sync_response sync_resp = duplication_sync(node, {});\n    ASSERT_TRUE(sync_resp.dup_map[app->app_id][dup->id].__isset.fail_mode);\n    ASSERT_EQ(sync_resp.dup_map[app->app_id][dup->id].fail_mode, duplication_fail_mode::FAIL_SKIP);\n\n    // ensure recovery will not lose fail_mode.\n    SetUp();\n    recover_from_meta_state();\n    app = find_app(test_app);\n    dup = app->duplications[dup->id];\n    ASSERT_EQ(dup->fail_mode(), duplication_fail_mode::FAIL_SKIP);\n}\n\nTEST_F(meta_duplication_service_test, create_follower_app_for_duplication)\n{\n    struct test_case\n    {\n        std::string fail_cfg_name;\n        std::string fail_cfg_action;\n        bool is_altering;\n        duplication_status::type cur_status;\n        duplication_status::type next_status;\n    } test_cases[] = {{\"update_app_request_ok\",\n                       \"void()\",\n                       false,\n                       duplication_status::DS_APP,\n                       duplication_status::DS_INIT},\n                      // the case just `palace holder`, actually\n                      // `trigger_follower_duplicate_checkpoint` is failed by default in unit test\n                      {\"update_dup_status_failed\",\n                       \"off()\",\n                       false,\n                       duplication_status::DS_PREPARE,\n                       duplication_status::DS_INIT},\n                      {\"persist_dup_status_failed\",\n                       \"return()\",\n                       true,\n                       duplication_status::DS_PREPARE,\n                       duplication_status::DS_APP}};\n\n    for (const auto &test : test_cases) {\n        std::string test_app = test.fail_cfg_name;\n        create_app(test_app);\n        auto app = find_app(test_app);\n\n        auto dup_add_resp = create_dup(test_app);\n        auto dup = app->duplications[dup_add_resp.dupid];\n\n        fail::setup();\n        fail::cfg(test.fail_cfg_name, test.fail_cfg_action);\n        create_follower_app_for_duplication(dup, app);\n        wait_all();\n        fail::teardown();\n        ASSERT_EQ(dup->is_altering(), test.is_altering);\n        ASSERT_EQ(next_status(dup), test.next_status);\n        ASSERT_EQ(dup->status(), test.cur_status);\n    }\n}\n\nTEST_F(meta_duplication_service_test, check_follower_app_if_create_completed)\n{\n    struct test_case\n    {\n        std::vector<std::string> fail_cfg_name;\n        std::vector<std::string> fail_cfg_action;\n        bool is_altering;\n        duplication_status::type cur_status;\n        duplication_status::type next_status;\n    } test_cases[] = {{{\"create_app_ok\"},\n                       {\"void()\"},\n                       false,\n                       duplication_status::DS_LOG,\n                       duplication_status::DS_INIT},\n                      // the case just `palace holder`, actually\n                      // `check_follower_app_if_create_completed` is failed by default in unit test\n                      {{\"create_app_failed\"},\n                       {\"off()\"},\n                       false,\n                       duplication_status::DS_APP,\n                       duplication_status::DS_INIT},\n                      {{\"create_app_ok\", \"persist_dup_status_failed\"},\n                       {\"void()\", \"return()\"},\n                       true,\n                       duplication_status::DS_APP,\n                       duplication_status::DS_LOG}};\n\n    for (const auto &test : test_cases) {\n        std::string test_app =\n            fmt::format(\"{}{}\", test.fail_cfg_name[0], test.fail_cfg_name.size());\n        create_app(test_app);\n        auto app = find_app(test_app);\n\n        auto dup_add_resp = create_dup(test_app);\n        auto dup = app->duplications[dup_add_resp.dupid];\n        // 'check_follower_app_if_create_completed' must execute under duplication_status::DS_APP,\n        // so force update it\n        force_update_dup_status(dup, duplication_status::DS_APP);\n        fail::setup();\n        for (int i = 0; i < test.fail_cfg_name.size(); i++) {\n            fail::cfg(test.fail_cfg_name[i], test.fail_cfg_action[i]);\n        }\n        check_follower_app_if_create_completed(dup);\n        wait_all();\n        fail::teardown();\n        ASSERT_EQ(dup->is_altering(), test.is_altering);\n        ASSERT_EQ(next_status(dup), test.next_status);\n        ASSERT_EQ(dup->status(), test.cur_status);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_http_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <iostream>\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/http/http_server.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"meta/meta_http_service.h\"\n#include \"meta/meta_service.h\"\n#include \"meta_test_base.h\"\n#include \"meta_service_test_app.h\"\n#include \"meta/meta_bulk_load_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_http_service_test : public meta_test_base\n{\npublic:\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        FLAGS_enable_http_server = false; // disable http server\n        _mhs = dsn::make_unique<meta_http_service>(_ms.get());\n        create_app(test_app);\n    }\n\n    /// === Tests ===\n\n    void test_get_app_from_primary()\n    {\n        http_request fake_req;\n        http_response fake_resp;\n        fake_req.query_args.emplace(\"name\", test_app);\n        _mhs->get_app_handler(fake_req, fake_resp);\n\n        ASSERT_EQ(fake_resp.status_code, http_status_code::ok)\n            << http_status_code_to_string(fake_resp.status_code);\n        std::string fake_json = R\"({\"general\":{\"app_name\":\")\" + test_app + R\"(\",\"app_id\":\"2)\" +\n                                R\"(\",\"partition_count\":\"8\",\"max_replica_count\":\"3\"}})\" + \"\\n\";\n        ASSERT_EQ(fake_resp.body, fake_json);\n    }\n\n    void test_get_app_envs()\n    {\n        // set app env\n        std::string env_key = \"replica.slow_query_threshold\";\n        std::string env_value = \"100\";\n        update_app_envs(test_app, {env_key}, {env_value});\n\n        // http get app envs\n        http_request fake_req;\n        http_response fake_resp;\n        fake_req.query_args.emplace(\"name\", test_app);\n        _mhs->get_app_envs_handler(fake_req, fake_resp);\n\n        // env (value_version, 1) was set by create_app\n        std::string fake_json = R\"({\")\" + env_key + R\"(\":)\" + R\"(\")\" + env_value + R\"(\",)\" +\n                                R\"(\"value_version\":\"1\"})\" + \"\\n\";\n        ASSERT_EQ(fake_resp.status_code, http_status_code::ok)\n            << http_status_code_to_string(fake_resp.status_code);\n        ASSERT_EQ(fake_resp.body, fake_json);\n    }\n\n    std::unique_ptr<meta_http_service> _mhs;\n    std::string test_app = \"test_meta_http\";\n};\n\nclass meta_backup_test_base : public meta_test_base\n{\npublic:\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n\n        _ms->_backup_handler = std::make_shared<backup_service>(\n            _ms.get(),\n            _ms->_cluster_root + \"/backup_meta\",\n            _ms->_cluster_root + \"/backup\",\n            [](backup_service *bs) { return std::make_shared<policy_context>(bs); });\n        _ms->_backup_handler->start();\n        _ms->_backup_handler->backup_option().app_dropped_retry_delay_ms = 500_ms;\n        _ms->_backup_handler->backup_option().request_backup_period_ms = 20_ms;\n        _ms->_backup_handler->backup_option().issue_backup_interval_ms = 1000_ms;\n        const std::string policy_root = \"/test\";\n        dsn::error_code ec;\n        _ms->_storage\n            ->create_node(\n                _policy_root, dsn::TASK_CODE_EXEC_INLINED, [&ec](dsn::error_code err) { ec = err; })\n            ->wait();\n        _mhs = dsn::make_unique<meta_http_service>(_ms.get());\n        create_app(test_app);\n    }\n\n    void add_backup_policy(const std::string &policy_name)\n    {\n        static const std::string test_policy_name = policy_name;\n        const std::string policy_root = \"/test\";\n\n        configuration_add_backup_policy_request request;\n        configuration_add_backup_policy_response response;\n\n        request.policy_name = policy_name;\n        request.backup_provider_type = \"local_service\";\n        request.backup_interval_seconds = 24 * 60 * 60;\n        request.backup_history_count_to_keep = 1;\n        request.start_time = \"12:00\";\n        request.app_ids.clear();\n        request.app_ids.push_back(2);\n\n        auto result = fake_create_policy(_ms->_backup_handler.get(), request);\n\n        fake_wait_rpc(result, response);\n        // need to fix\n        ASSERT_EQ(response.err, ERR_OK);\n    }\n\n    void test_get_backup_policy(const std::string &name,\n                                const std::string &expected_json,\n                                const http_status_code &http_status)\n    {\n        http_request req;\n        http_response resp;\n        if (!name.empty())\n            req.query_args.emplace(\"name\", name);\n        _mhs->query_backup_policy_handler(req, resp);\n        ASSERT_EQ(resp.status_code, http_status) << http_status_code_to_string(resp.status_code);\n        ASSERT_EQ(resp.body, expected_json);\n    }\n\nprotected:\n    const std::string _policy_root = \"/test\";\n\n    std::unique_ptr<meta_http_service> _mhs;\n    std::string test_app = \"test_meta_http\";\n};\n\nTEST_F(meta_http_service_test, get_app_from_primary) { test_get_app_from_primary(); }\n\nTEST_F(meta_http_service_test, get_app_envs) { test_get_app_envs(); }\n\nTEST_F(meta_backup_test_base, get_backup_policy)\n{\n    struct http_backup_policy_test\n    {\n        std::string name;\n        std::string expected_json;\n        http_status_code http_status;\n    } tests[5] = {\n        {\"\", \"{}\\n\", http_status_code::ok},\n        {\"TEST1\",\n         \"{\\\"TEST1\\\":{\\\"name\\\":\\\"TEST1\\\",\\\"backup_provider_type\\\":\\\"local_service\\\",\"\n         \"\\\"backup_interval\\\":\\\"86400\\\",\\\"app_ids\\\":\\\"[2]\\\",\\\"start_time\\\":\\\"12:00\\\",\"\n         \"\\\"status\\\":\\\"enabled\\\",\\\"backup_history_count\\\":\\\"1\\\"}}\\n\",\n         http_status_code::ok},\n        {\"TEST2\",\n         \"{\\\"TEST2\\\":{\\\"name\\\":\\\"TEST2\\\",\\\"backup_provider_type\\\":\\\"local_service\\\",\"\n         \"\\\"backup_interval\\\":\\\"86400\\\",\\\"app_ids\\\":\\\"[2]\\\",\\\"start_time\\\":\\\"12:00\\\",\"\n         \"\\\"status\\\":\\\"enabled\\\",\\\"backup_history_count\\\":\\\"1\\\"}}\\n\",\n         http_status_code::ok},\n        {\"\",\n         \"{\\\"TEST1\\\":{\\\"name\\\":\\\"TEST1\\\",\\\"backup_provider_type\\\":\\\"local_service\\\",\\\"backup_\"\n         \"interval\\\":\\\"86400\\\",\\\"app_ids\\\":\\\"[2]\\\",\\\"start_time\\\":\\\"12:00\\\",\\\"status\\\":\\\"enabled\\\",\"\n         \"\\\"backup_history_count\\\":\\\"1\\\"},\\\"TEST2\\\":{\\\"name\\\":\\\"TEST2\\\",\\\"backup_provider_\"\n         \"type\\\":\\\"local_service\\\",\\\"backup_interval\\\":\\\"86400\\\",\\\"app_ids\\\":\\\"[2]\\\",\\\"start_\"\n         \"time\\\":\\\"12:00\\\",\\\"status\\\":\\\"enabled\\\",\\\"backup_history_count\\\":\\\"1\\\"}}\\n\",\n         http_status_code::ok},\n        {\"TEST3\", \"{}\\n\", http_status_code::ok},\n    };\n    test_get_backup_policy(tests[0].name, tests[0].expected_json, tests[0].http_status);\n    add_backup_policy(\"TEST1\");\n    test_get_backup_policy(tests[1].name, tests[1].expected_json, tests[1].http_status);\n    add_backup_policy(\"TEST2\");\n    test_get_backup_policy(tests[2].name, tests[2].expected_json, tests[2].http_status);\n    test_get_backup_policy(tests[3].name, tests[3].expected_json, tests[3].http_status);\n    test_get_backup_policy(tests[4].name, tests[4].expected_json, tests[4].http_status);\n}\n\nclass meta_bulk_load_http_test : public meta_test_base\n{\npublic:\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        FLAGS_enable_http_server = false;\n        _mhs = dsn::make_unique<meta_http_service>(_ms.get());\n        create_app(APP_NAME);\n    }\n\n    void TearDown() override\n    {\n        drop_app(APP_NAME);\n        _mhs = nullptr;\n        meta_test_base::TearDown();\n    }\n\n    http_response test_start_bulk_load(std::string req_body_json)\n    {\n        http_request req;\n        http_response resp;\n        req.body = blob::create_from_bytes(std::move(req_body_json));\n        _mhs->start_bulk_load_handler(req, resp);\n        return resp;\n    }\n\n    std::string test_query_bulk_load(const std::string &app_name)\n    {\n        http_request req;\n        http_response resp;\n        req.query_args.emplace(\"name\", app_name);\n        _mhs->query_bulk_load_handler(req, resp);\n        return resp.body;\n    }\n\n    http_response test_start_compaction(std::string req_body_json)\n    {\n        http_request req;\n        http_response resp;\n        req.body = blob::create_from_bytes(std::move(req_body_json));\n        _mhs->start_compaction_handler(req, resp);\n        return resp;\n    }\n\n    http_response test_update_scenario(std::string req_body_json)\n    {\n        http_request req;\n        http_response resp;\n        req.body = blob::create_from_bytes(std::move(req_body_json));\n        _mhs->update_scenario_handler(req, resp);\n        return resp;\n    }\n\n    void mock_bulk_load_context(const bulk_load_status::type &status)\n    {\n        auto app = find_app(APP_NAME);\n        app->is_bulk_loading = true;\n        const auto app_id = app->app_id;\n        bulk_svc()._bulk_load_app_id.insert(app_id);\n        bulk_svc()._apps_in_progress_count[app_id] = app->partition_count;\n        bulk_svc()._app_bulk_load_info[app_id].status = status;\n        for (int i = 0; i < app->partition_count; ++i) {\n            gpid pid = gpid(app_id, i);\n            bulk_svc()._partition_bulk_load_info[pid].status = status;\n        }\n    }\n\n    void reset_local_bulk_load_states()\n    {\n        auto app = find_app(APP_NAME);\n        bulk_svc().reset_local_bulk_load_states(app->app_id, APP_NAME, true);\n        app->is_bulk_loading = false;\n    }\n\nprotected:\n    std::unique_ptr<meta_http_service> _mhs;\n    std::string APP_NAME = \"test_bulk_load\";\n};\n\nTEST_F(meta_bulk_load_http_test, start_bulk_load_request)\n{\n    fail::setup();\n    fail::cfg(\"meta_on_start_bulk_load\", \"return()\");\n    struct start_bulk_load_test\n    {\n        std::string request_json;\n        http_status_code expected_code;\n        std::string expected_response_json;\n    } tests[] = {\n        {R\"({\"app\":\"test_bulk_load\",\"cluster_name\":\"onebox\",\"file_provider_type\":\"local_service\",\"remote_root_path\":\"bulk_load_root\"})\",\n         http_status_code::bad_request,\n         \"invalid request structure\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"cluster_name\":\"onebox\",\"file_provider_type\":\"\",\"remote_root_path\":\"bulk_load_root\"})\",\n         http_status_code::bad_request,\n         \"file_provider_type should not be empty\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"cluster_name\":\"onebox\",\"file_provider_type\":\"local_service\",\"remote_root_path\":\"bulk_load_root\"})\",\n         http_status_code::ok,\n         R\"({\"error\":\"ERR_OK\",\"hint_msg\":\"\"})\"},\n    };\n    for (const auto &test : tests) {\n        http_response resp = test_start_bulk_load(test.request_json);\n        ASSERT_EQ(resp.status_code, test.expected_code);\n        std::string expected_json = test.expected_response_json;\n        if (test.expected_code == http_status_code::ok) {\n            expected_json += \"\\n\";\n        }\n        ASSERT_EQ(resp.body, expected_json);\n    }\n    fail::teardown();\n}\n\nTEST_F(meta_bulk_load_http_test, query_bulk_load_request)\n{\n    const std::string NOT_BULK_LOAD = \"not_bulk_load_app\";\n    const std::string NOT_FOUND = \"app_not_exist\";\n\n    create_app(NOT_BULK_LOAD);\n    mock_bulk_load_context(bulk_load_status::BLS_DOWNLOADING);\n\n    struct query_bulk_load_test\n    {\n        std::string app_name;\n        std::string expected_json;\n    } tests[] = {\n        {APP_NAME,\n         R\"({\"error\":\"ERR_OK\",\"app_status\":\"replication::bulk_load_status::BLS_DOWNLOADING\"})\"},\n        {NOT_BULK_LOAD,\n         R\"({\"error\":\"ERR_OK\",\"app_status\":\"replication::bulk_load_status::BLS_INVALID\"})\"},\n        {NOT_FOUND,\n         R\"({\"error\":\"ERR_APP_NOT_EXIST\",\"app_status\":\"replication::bulk_load_status::BLS_INVALID\"})\"}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(test_query_bulk_load(test.app_name), test.expected_json + \"\\n\");\n    }\n\n    drop_app(NOT_BULK_LOAD);\n}\n\nTEST_F(meta_bulk_load_http_test, start_compaction_test)\n{\n    struct start_compaction_test\n    {\n        std::string request_json;\n        http_status_code expected_code;\n        std::string expected_response_json;\n    } tests[] = {\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"once\",\"target_level\":-1,\"bottommost_level_compaction\":\"skip\",\"max_concurrent_running_count\":\"0\"})\",\n         http_status_code::bad_request,\n         \"invalid request structure\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"wrong\",\"target_level\":-1,\"bottommost_level_compaction\":\"skip\",\"max_concurrent_running_count\":0,\"trigger_time\":\"\"})\",\n         http_status_code::bad_request,\n         \"type should ony be 'once' or 'periodic'\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"once\",\"target_level\":-3,\"bottommost_level_compaction\":\"skip\",\"max_concurrent_running_count\":0,\"trigger_time\":\"\"})\",\n         http_status_code::bad_request,\n         \"target_level should be >= -1\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"once\",\"target_level\":-1,\"bottommost_level_compaction\":\"wrong\",\"max_concurrent_running_count\":0,\"trigger_time\":\"\"})\",\n         http_status_code::bad_request,\n         \"bottommost_level_compaction should ony be 'skip' or 'force'\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"once\",\"target_level\":-1,\"bottommost_level_compaction\":\"skip\",\"max_concurrent_running_count\":-2,\"trigger_time\":\"\"})\",\n         http_status_code::bad_request,\n         \"max_running_count should be >= 0\"},\n        {R\"({\"app_name\":\"test_bulk_load\",\"type\":\"once\",\"target_level\":-1,\"bottommost_level_compaction\":\"skip\",\"max_concurrent_running_count\":0,\"trigger_time\":\"\"})\",\n         http_status_code::ok,\n         R\"({\"error\":\"ERR_OK\",\"hint_message\":\"\"})\"}};\n\n    for (const auto &test : tests) {\n        http_response resp = test_start_compaction(test.request_json);\n        ASSERT_EQ(resp.status_code, test.expected_code);\n        std::string expected_json = test.expected_response_json;\n        if (test.expected_code == http_status_code::ok) {\n            expected_json += \"\\n\";\n        }\n        ASSERT_EQ(resp.body, expected_json);\n    }\n}\n\nTEST_F(meta_bulk_load_http_test, update_scenario_test)\n{\n    struct update_scenario_test\n    {\n        std::string request_json;\n        http_status_code expected_code;\n        std::string expected_response_json;\n    } tests[] = {{R\"({\"app\":\"test_bulk_load\",\"scenario\":\"normal\"})\",\n                  http_status_code::bad_request,\n                  \"invalid request structure\"},\n                 {R\"({\"app_name\":\"test_bulk_load\",\"scenario\":\"wrong\"})\",\n                  http_status_code::bad_request,\n                  \"scenario should ony be 'normal' or 'bulk_load'\"},\n                 {R\"({\"app_name\":\"test_bulk_load\",\"scenario\":\"bulk_load\"})\",\n                  http_status_code::ok,\n                  R\"({\"error\":\"ERR_OK\",\"hint_message\":\"\"})\"}};\n\n    for (const auto &test : tests) {\n        http_response resp = test_update_scenario(test.request_json);\n        ASSERT_EQ(resp.status_code, test.expected_code);\n        std::string expected_json = test.expected_response_json;\n        if (test.expected_code == http_status_code::ok) {\n            expected_json += \"\\n\";\n        }\n        ASSERT_EQ(resp.body, expected_json);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_mauanl_compaction_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/replication/replica_envs.h>\n\n#include \"meta_service_test_app.h\"\n#include \"meta_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\nclass meta_app_compaction_test : public meta_test_base\n{\npublic:\n    meta_app_compaction_test() {}\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        prepare();\n    }\n\n    void prepare()\n    {\n        create_app(APP_NAME, PARTITION_COUNT);\n        auto app = find_app(APP_NAME);\n        app->partitions.resize(PARTITION_COUNT);\n        app->helpers->contexts.resize(PARTITION_COUNT);\n        for (auto i = 0; i < PARTITION_COUNT; ++i) {\n            serving_replica rep;\n            rep.compact_status = manual_compaction_status::IDLE;\n            std::vector<serving_replica> reps;\n            reps.emplace_back(rep);\n            reps.emplace_back(rep);\n            reps.emplace_back(rep);\n            app->helpers->contexts[i].serving = reps;\n        }\n    }\n\n    error_code start_manual_compaction(std::string app_name,\n                                       std::string disable_manual,\n                                       bool bottommost = false,\n                                       int32_t target_level = -1,\n                                       int32_t running_count = 0)\n    {\n        if (app_name == APP_NAME) {\n            auto app = find_app(app_name);\n            app->envs[replica_envs::MANUAL_COMPACT_DISABLED] = disable_manual;\n        }\n        auto request = dsn::make_unique<start_app_manual_compact_request>();\n        request->app_name = app_name;\n        if (target_level != -1) {\n            request->__set_target_level(target_level);\n        }\n        if (running_count != 0) {\n            request->__set_max_running_count(running_count);\n        }\n        request->__set_bottommost(bottommost);\n\n        start_manual_compact_rpc rpc(std::move(request), RPC_CM_START_MANUAL_COMPACT);\n        _ss->on_start_manual_compact(rpc);\n        _ss->wait_all_task();\n        return rpc.response().err;\n    }\n\n    void check_after_start_compaction(std::string bottommost,\n                                      int32_t target_level = -1,\n                                      int32_t running_count = 0)\n    {\n        auto app = find_app(APP_NAME);\n        if (app->envs.find(replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION) !=\n            app->envs.end()) {\n            ASSERT_EQ(app->envs[replica_envs::MANUAL_COMPACT_ONCE_BOTTOMMOST_LEVEL_COMPACTION],\n                      bottommost);\n        }\n        if (app->envs.find(replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL) != app->envs.end()) {\n            ASSERT_EQ(app->envs[replica_envs::MANUAL_COMPACT_ONCE_TARGET_LEVEL],\n                      std::to_string(target_level));\n        }\n        if (running_count > 0 &&\n            app->envs.find(replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT) !=\n                app->envs.end()) {\n            ASSERT_EQ(app->envs[replica_envs::MANUAL_COMPACT_MAX_CONCURRENT_RUNNING_COUNT],\n                      std::to_string(running_count));\n        }\n        for (auto &cc : app->helpers->contexts) {\n            for (auto &r : cc.serving) {\n                ASSERT_EQ(r.compact_status, manual_compaction_status::IDLE);\n            }\n        }\n    }\n\n    query_app_manual_compact_response query_manual_compaction(int32_t mock_progress)\n    {\n        manual_compaction_status::type status = manual_compaction_status::IDLE;\n        if (mock_progress == 0) {\n            status = manual_compaction_status::QUEUING;\n        } else if (mock_progress == 100) {\n            status = manual_compaction_status::FINISHED;\n        }\n        auto app = find_app(APP_NAME);\n        app->helpers->reset_manual_compact_status();\n        for (auto &cc : app->helpers->contexts) {\n            for (auto &r : cc.serving) {\n                r.compact_status = status;\n            }\n        }\n        if (mock_progress == 50) {\n            for (auto i = 0; i < PARTITION_COUNT / 2; i++) {\n                auto &cc = app->helpers->contexts[i];\n                for (auto &r : cc.serving) {\n                    r.compact_status = manual_compaction_status::FINISHED;\n                }\n            }\n        }\n        auto request = dsn::make_unique<query_app_manual_compact_request>();\n        request->app_name = APP_NAME;\n\n        query_manual_compact_rpc rpc(std::move(request), RPC_CM_QUERY_MANUAL_COMPACT_STATUS);\n        _ss->on_query_manual_compact_status(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\npublic:\n    std::string APP_NAME = \"manual_compaction_test\";\n    int32_t PARTITION_COUNT = 4;\n};\n\nTEST_F(meta_app_compaction_test, test_start_compaction)\n{\n    struct test_case\n    {\n        std::string app_name;\n        std::string disable_compaction;\n        bool bottommost;\n        int32_t target_level;\n        int32_t running_count;\n        error_code expected_err;\n        std::string expected_bottommost;\n    } tests[] = {{\"app_not_exist\", \"false\", false, -1, 0, ERR_APP_NOT_EXIST, \"skip\"},\n                 {APP_NAME, \"true\", false, -1, 0, ERR_OPERATION_DISABLED, \"skip\"},\n                 {APP_NAME, \"false\", false, -5, 0, ERR_INVALID_PARAMETERS, \"skip\"},\n                 {APP_NAME, \"false\", false, -1, -1, ERR_INVALID_PARAMETERS, \"skip\"},\n                 {APP_NAME, \"false\", false, -1, 0, ERR_OK, \"skip\"},\n                 {APP_NAME, \"false\", true, -1, 0, ERR_OK, \"force\"},\n                 {APP_NAME, \"false\", false, 1, 0, ERR_OK, \"skip\"},\n                 {APP_NAME, \"false\", true, -1, 1, ERR_OK, \"force\"}};\n\n    for (const auto &test : tests) {\n        auto err = start_manual_compaction(test.app_name,\n                                           test.disable_compaction,\n                                           test.bottommost,\n                                           test.target_level,\n                                           test.running_count);\n        ASSERT_EQ(err, test.expected_err);\n        if (err == ERR_OK) {\n            check_after_start_compaction(\n                test.expected_bottommost, test.target_level, test.running_count);\n        }\n    }\n}\n\nTEST_F(meta_app_compaction_test, test_query_compaction)\n{\n    struct test_case\n    {\n        int32_t mock_progress;\n        error_code expected_err;\n    } tests[] = {{-1, ERR_INVALID_STATE}, {0, ERR_OK}, {50, ERR_OK}, {100, ERR_OK}};\n\n    for (const auto &test : tests) {\n        auto resp = query_manual_compaction(test.mock_progress);\n        ASSERT_EQ(resp.err, test.expected_err);\n        if (resp.err == ERR_OK) {\n            ASSERT_EQ(resp.progress, test.mock_progress);\n        }\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_partition_guardian_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <climits>\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/dist/replication.h>\n\n#include \"meta/meta_data.h\"\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/greedy_load_balancer.h\"\n#include \"meta/test/misc/misc.h\"\n#include \"meta_test_base.h\"\n#include \"meta_service_test_app.h\"\n#include \"dummy_balancer.h\"\n\nnamespace dsn {\nnamespace replication {\n\ntypedef std::shared_ptr<configuration_update_request> cur_ptr;\n\n// apply request in request.type to request.config\nstatic void apply_update_request(/*in-out*/ configuration_update_request &update_req)\n{\n    dsn::partition_configuration &pc = update_req.config;\n    pc.ballot++;\n\n    switch (update_req.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        pc.primary = update_req.node;\n        replica_helper::remove_node(update_req.node, pc.secondaries);\n        break;\n\n    case config_type::CT_ADD_SECONDARY:\n    case config_type::CT_ADD_SECONDARY_FOR_LB:\n        pc.secondaries.push_back(update_req.node);\n        update_req.type = config_type::CT_UPGRADE_TO_SECONDARY;\n        break;\n\n    case config_type::CT_REMOVE:\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        if (update_req.node == pc.primary)\n            pc.primary.set_invalid();\n        else\n            replica_helper::remove_node(update_req.node, pc.secondaries);\n        break;\n\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        pc.secondaries.push_back(pc.primary);\n        pc.primary.set_invalid();\n        break;\n    default:\n        break;\n    }\n}\n\nstatic auto default_filter = [](const dsn::rpc_address &target, dsn::message_ex *request) {\n    dsn::message_ex *recv_request = create_corresponding_receive(request);\n    cur_ptr update_req = std::make_shared<configuration_update_request>();\n    ::dsn::unmarshall(recv_request, *update_req);\n    destroy_message(recv_request);\n    apply_update_request(*update_req);\n    return update_req;\n};\n\nclass meta_partition_guardian_test : public meta_test_base\n{\npublic:\n    void cure_test();\n    void cure();\n    void from_proposal_test();\n\n    void call_update_configuration(\n        meta_service *svc, std::shared_ptr<dsn::replication::configuration_update_request> &request)\n    {\n        dsn::message_ex *fake_request =\n            dsn::message_ex::create_request(RPC_CM_UPDATE_PARTITION_CONFIGURATION);\n        ::dsn::marshall(fake_request, *request);\n        fake_request->add_ref();\n\n        dsn::tasking::enqueue(\n            LPC_META_STATE_HIGH,\n            nullptr,\n            std::bind(\n                &server_state::on_update_configuration, svc->_state.get(), request, fake_request),\n            server_state::sStateHash);\n    }\n};\n\nclass message_filter : public dsn::replication::meta_service\n{\npublic:\n    typedef std::function<cur_ptr(const dsn::rpc_address &target, dsn::message_ex *request)> filter;\n    message_filter(meta_partition_guardian_test *app) : meta_service(), _app(app) {}\n    void set_filter(const filter &f) { _filter = f; }\n    virtual void reply_message(dsn::message_ex *request, dsn::message_ex *response) override\n    {\n        destroy_message(response);\n    }\n\n    virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override\n    {\n        // we expect this is a configuration_update_request proposal\n        cur_ptr update_request = _filter(target, request);\n        destroy_message(request);\n\n        if (update_request != nullptr) {\n            _app->call_update_configuration(this, update_request);\n        }\n    }\n\nprivate:\n    meta_partition_guardian_test *_app;\n    filter _filter;\n};\n\nvoid meta_partition_guardian_test::cure_test()\n{\n    dsn::error_code ec;\n    dsn::task_ptr t;\n    std::shared_ptr<message_filter> svc(new message_filter(this));\n    svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get()));\n    bool proposal_sent;\n    dsn::rpc_address last_addr;\n\n    ec = svc->remote_storage_initialize();\n    ASSERT_EQ(ec, dsn::ERR_OK);\n    svc->_partition_guardian.reset(new partition_guardian(svc.get()));\n    svc->_balancer.reset(new dummy_balancer(svc.get()));\n\n    server_state *state = svc->_state.get();\n    state->initialize(svc.get(), meta_options::concat_path_unix_style(svc->_cluster_root, \"apps\"));\n    dsn::app_info info;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_CREATING;\n    info.app_id = 1;\n    info.app_name = \"simple_kv.instance0\";\n    info.app_type = \"simple_kv\";\n    info.max_replica_count = 3;\n    info.partition_count = 1;\n    std::shared_ptr<app_state> app = app_state::create(info);\n    state->_all_apps.emplace(1, app);\n    state->sync_apps_to_remote_storage();\n    ASSERT_TRUE(state->spin_wait_staging(20));\n    svc->_started = true;\n\n    std::vector<dsn::rpc_address> nodes;\n    generate_node_list(nodes, 4, 4);\n\n    dsn::partition_configuration &pc = app->partitions[0];\n    config_context &cc = *get_config_context(state->_all_apps, dsn::gpid(1, 0));\n\n#define PROPOSAL_FLAG_CHECK                                                                        \\\n    ASSERT_TRUE(proposal_sent);                                                                    \\\n    proposal_sent = false\n\n#define CONDITION_CHECK(cond) ASSERT_TRUE(spin_wait_condition(cond, 20))\n\n    std::cerr << \"Case: upgrade secondary to primary, and message lost\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary.set_invalid();\n    pc.secondaries = {nodes[0], nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then ignore the proposal\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY);\n        EXPECT_TRUE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, update_req->node);\n\n        last_addr = update_req->node;\n        proposal_sent = true;\n        return nullptr;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n\n    // check partitions again\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(config_type::CT_UPGRADE_TO_PRIMARY, update_req->type);\n        EXPECT_EQ(update_req->node, last_addr);\n        EXPECT_EQ(target, update_req->node);\n\n        proposal_sent = true;\n        apply_update_request(*update_req);\n\n        svc->set_filter(default_filter);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.primary == last_addr; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: upgrade secondary to primary, and the candidate died\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary.set_invalid();\n    pc.secondaries = {nodes[0], nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then inject a event that node[0] is dead\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY);\n        EXPECT_TRUE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, update_req->node);\n\n        proposal_sent = true;\n        last_addr = update_req->node;\n        svc->set_node_state({target}, false);\n        return nullptr;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n\n    // check partitions again\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_UPGRADE_TO_PRIMARY);\n        EXPECT_TRUE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, update_req->node);\n        EXPECT_NE(target, last_addr);\n\n        proposal_sent = true;\n        apply_update_request(*update_req);\n        svc->set_filter(default_filter);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return !pc.primary.is_invalid() && pc.primary != last_addr; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: add secondary, and the message lost\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then ignore the proposal\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_FALSE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, nodes[0]);\n\n        last_addr = update_req->node;\n        proposal_sent = true;\n        return nullptr;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n\n    // check partitions again\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_EQ(update_req->node, last_addr);\n        EXPECT_EQ(target, nodes[0]);\n\n        proposal_sent = true;\n        apply_update_request(*update_req);\n        svc->set_filter(default_filter);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: add secondary, but the primary is removing another\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then inject another update_request\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_FALSE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, nodes[0]);\n\n        update_req->config.ballot++;\n        update_req->type = config_type::CT_DOWNGRADE_TO_INACTIVE;\n        update_req->node = update_req->config.secondaries[0];\n        update_req->config.secondaries.clear();\n\n        proposal_sent = true;\n\n        svc->set_filter(default_filter);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.secondaries.size() == 2; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: add secondary, and the added secondary is dead\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then inject the nodes[2] dead\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_FALSE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, nodes[0]);\n\n        last_addr = update_req->node;\n        svc->set_node_state({update_req->node}, false);\n        proposal_sent = true;\n        return nullptr;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n\n    // check partitions again\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_NE(update_req->node, last_addr);\n        EXPECT_FALSE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, nodes[0]);\n\n        proposal_sent = true;\n        last_addr = update_req->node;\n        apply_update_request(*update_req);\n        svc->set_filter(default_filter);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.secondaries.size() == 2 && is_secondary(pc, last_addr); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: add secondary, and the primary is dead\" << std::endl;\n    // initialize\n    state->_nodes.clear();\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1]};\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n    proposal_sent = false;\n\n    // check partitions, then ignore the proposal\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ADD_SECONDARY);\n        EXPECT_FALSE(is_secondary(pc, update_req->node));\n        EXPECT_EQ(target, pc.primary);\n\n        proposal_sent = true;\n        svc->set_node_state({pc.primary}, false);\n        svc->set_filter(default_filter);\n        return nullptr;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.primary == nodes[1]; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    state->_nodes.clear();\n    pc.primary.set_invalid();\n    pc.secondaries.clear();\n    pc.last_drops = {nodes[0], nodes[1], nodes[2]};\n    pc.ballot = 4;\n    state->initialize_node_state();\n    svc->set_node_state(nodes, true);\n\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY);\n        EXPECT_EQ(update_req->node, nodes[2]);\n        EXPECT_EQ(target, nodes[2]);\n\n        proposal_sent = true;\n        svc->set_filter(default_filter);\n        apply_update_request(*update_req);\n        return update_req;\n    });\n\n    std::cerr << \"Case: recover from DDD state, nodes[1] isn't alive\" << std::endl;\n    svc->set_node_state({nodes[1]}, false);\n    cc.dropped = {\n        dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n        dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n        dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n    };\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(500));\n\n    std::cerr << \"Case: recover from DDD state, nodes[2] is not in dropped\" << std::endl;\n    svc->set_node_state({nodes[1]}, true);\n    cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1}};\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and \"\n                 \"nodes[2]'s info haven't updated\"\n              << std::endl;\n    cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[2], 500, -1, -1, -1}};\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, haven't collect nodes[2]'s info from replica, and \"\n                 \"nodes[2]'s info have updated\"\n              << std::endl;\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY);\n        EXPECT_EQ(update_req->node, nodes[1]);\n        EXPECT_EQ(target, nodes[1]);\n\n        proposal_sent = true;\n        svc->set_filter(default_filter);\n        apply_update_request(*update_req);\n        return update_req;\n    });\n\n    cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[2], 500, -1, -1, -1}};\n    pc.last_committed_decree = 0;\n    get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true);\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.primary == nodes[1]; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD, haven't collect nodes[1/2]'s info from replica, and \"\n                 \"nodes[1/2]'s info both have updated\"\n              << std::endl;\n    cc.dropped = {dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n                  dropped_replica{nodes[1], 500, -1, -1, -1},\n                  dropped_replica{nodes[2], 500, -1, -1, -1}};\n    get_node_state(state->_nodes, nodes[1], false)->set_replicas_collect_flag(true);\n    get_node_state(state->_nodes, nodes[2], false)->set_replicas_collect_flag(true);\n\n    pc.primary.set_invalid();\n    pc.secondaries.clear();\n    pc.last_drops = {nodes[0], nodes[1], nodes[2]};\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, larger ballot not match with larger decree\"\n              << std::endl;\n    cc.dropped = {\n        dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n        dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 0, 1},\n        dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 0, 1, 1},\n    };\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, committed decree less than meta's\" << std::endl;\n    cc.dropped = {\n        dropped_replica{nodes[0], dropped_replica::INVALID_TIMESTAMP, 1, 1, 1},\n        dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 1, 10, 15},\n        dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 1, 15, 15},\n    };\n    pc.last_committed_decree = 30;\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    ASSERT_FALSE(proposal_sent);\n    CONDITION_CHECK([&] { return pc.primary.is_invalid(); });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, select primary from config_context::dropped\"\n              << std::endl;\n    cc.dropped = {\n        dropped_replica{nodes[0], 12344, -1, -1, -1},\n        dropped_replica{nodes[2], dropped_replica::INVALID_TIMESTAMP, 4, 2, 4},\n        dropped_replica{nodes[1], dropped_replica::INVALID_TIMESTAMP, 4, 3, 4},\n    };\n    pc.last_committed_decree = 2;\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY);\n        EXPECT_EQ(update_req->node, nodes[1]);\n        EXPECT_EQ(target, nodes[1]);\n\n        proposal_sent = true;\n        svc->set_filter(default_filter);\n        apply_update_request(*update_req);\n        return update_req;\n    });\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.primary == nodes[1]; });\n    std::this_thread::sleep_for(std::chrono::milliseconds(200));\n\n    std::cerr << \"Case: recover from DDD state, only one primary\" << std::endl;\n    svc->set_filter([&](const dsn::rpc_address &target, dsn::message_ex *req) -> cur_ptr {\n        dsn::message_ex *recv_request = create_corresponding_receive(req);\n        cur_ptr update_req = std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n        destroy_message(recv_request);\n\n        EXPECT_EQ(update_req->type, config_type::CT_ASSIGN_PRIMARY);\n        EXPECT_EQ(update_req->node, nodes[0]);\n        EXPECT_EQ(target, nodes[0]);\n\n        proposal_sent = true;\n        svc->set_filter(default_filter);\n        apply_update_request(*update_req);\n        return update_req;\n    });\n\n    pc.primary.set_invalid();\n    pc.secondaries.clear();\n    pc.last_drops = {nodes[0]};\n    state->_nodes.clear();\n    pc.ballot = 1;\n    state->initialize_node_state();\n    svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true);\n\n    t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                              nullptr,\n                              std::bind(&server_state::check_all_partitions, state),\n                              server_state::sStateHash);\n    t->wait();\n    PROPOSAL_FLAG_CHECK;\n    CONDITION_CHECK([&] { return pc.primary == nodes[0]; });\n}\n\nstatic void check_nodes_loads(node_mapper &nodes)\n{\n    unsigned int min_primaries = UINT_MAX, min_partitions = UINT_MAX;\n    unsigned int max_primaries = 0, max_partitions = 0;\n    for (auto &pairs : nodes) {\n        const node_state &ns = pairs.second;\n        min_primaries = std::min(min_primaries, ns.primary_count());\n        min_partitions = std::min(min_partitions, ns.partition_count());\n        max_primaries = std::max(max_primaries, ns.primary_count());\n        max_partitions = std::max(max_partitions, ns.partition_count());\n    }\n\n    ASSERT_TRUE(max_primaries - min_primaries <= 1);\n    ASSERT_TRUE(max_partitions - min_partitions <= 1);\n}\n\nvoid meta_partition_guardian_test::cure()\n{\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 20, 100);\n\n    app_mapper app;\n    node_mapper nodes;\n    meta_service svc;\n    partition_guardian guardian(&svc);\n\n    dsn::app_info info;\n    info.app_id = 1;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.max_replica_count = 3;\n    info.partition_count = 1024;\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n\n    app.emplace(the_app->app_id, the_app);\n    for (const auto &address : node_list) {\n        get_node_state(nodes, address, true)->set_alive(true);\n    }\n\n    bool all_partitions_healthy = false;\n    while (!all_partitions_healthy) {\n        configuration_proposal_action action;\n        pc_status status;\n        all_partitions_healthy = true;\n\n        for (int i = 0; i != the_app->partition_count; ++i) {\n            dsn::gpid &pid = the_app->partitions[i].pid;\n            status = guardian.cure({&app, &nodes}, pid, action);\n            if (status != pc_status::healthy) {\n                all_partitions_healthy = false;\n                proposal_action_check_and_apply(action, pid, app, nodes, nullptr);\n\n                configuration_update_request fake_request;\n                fake_request.info = *the_app;\n                fake_request.config = the_app->partitions[i];\n                fake_request.type = action.type;\n                fake_request.node = action.node;\n                fake_request.host_node = action.node;\n\n                guardian.reconfig({&app, &nodes}, fake_request);\n                check_nodes_loads(nodes);\n            }\n        }\n    }\n}\n\nvoid meta_partition_guardian_test::from_proposal_test()\n{\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 3, 3);\n\n    app_mapper app;\n    node_mapper nodes;\n    meta_service svc;\n\n    partition_guardian guardian(&svc);\n\n    dsn::app_info info;\n    info.app_id = 1;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_AVAILABLE;\n    info.app_name = \"test\";\n    info.app_type = \"test\";\n    info.max_replica_count = 3;\n    info.partition_count = 1;\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n\n    app.emplace(the_app->app_id, the_app);\n    for (const dsn::rpc_address &addr : node_list) {\n        get_node_state(nodes, addr, true)->set_alive(true);\n    }\n\n    meta_view mv{&app, &nodes};\n    dsn::gpid p(1, 0);\n    configuration_proposal_action cpa;\n    configuration_proposal_action cpa2;\n\n    dsn::partition_configuration &pc = *get_config(app, p);\n    config_context &cc = *get_config_context(app, p);\n\n    std::cerr << \"Case 1: test no proposals in config_context\" << std::endl;\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 2: test invalid proposal: invalid target\" << std::endl;\n    cpa2 =\n        new_proposal_action(dsn::rpc_address(), node_list[0], config_type::CT_UPGRADE_TO_PRIMARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 3: test invalid proposal: invalid node\" << std::endl;\n    cpa2 =\n        new_proposal_action(node_list[0], dsn::rpc_address(), config_type::CT_UPGRADE_TO_PRIMARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 4: test invalid proposal: dead target\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    get_node_state(nodes, node_list[0], false)->set_alive(false);\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n    get_node_state(nodes, node_list[0], false)->set_alive(true);\n\n    std::cerr << \"Case 5: test invalid proposal: dead node\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    get_node_state(nodes, node_list[1], false)->set_alive(false);\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n    get_node_state(nodes, node_list[1], false)->set_alive(true);\n\n    std::cerr << \"Case 6: test invalid proposal: already have priamry but assign\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_ASSIGN_PRIMARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    pc.primary = node_list[1];\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 7: test invalid proposal: upgrade non-secondary\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[0], config_type::CT_UPGRADE_TO_PRIMARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    pc.primary.set_invalid();\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 8: test invalid proposal: add exist secondary\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    pc.primary = node_list[0];\n    pc.secondaries = {node_list[1]};\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 9: test invalid proposal: downgrade non member\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_REMOVE);\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n    pc.primary = node_list[0];\n    pc.secondaries.clear();\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n\n    std::cerr << \"Case 10: test abnormal learning detect\" << std::endl;\n    cpa2 = new_proposal_action(node_list[0], node_list[1], config_type::CT_ADD_SECONDARY);\n    pc.primary = node_list[0];\n    pc.secondaries.clear();\n    cc.lb_actions.assign_balancer_proposals({cpa2});\n\n    replica_info i;\n    i.pid = p;\n    i.status = partition_status::PS_POTENTIAL_SECONDARY;\n    i.ballot = 10;\n    i.last_durable_decree = 10;\n    i.last_committed_decree = 10;\n    i.last_prepared_decree = 10;\n\n    collect_replica(mv, node_list[1], i);\n    ASSERT_TRUE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_ADD_SECONDARY, cpa.type);\n\n    i.status = partition_status::PS_ERROR;\n    collect_replica(mv, node_list[1], i);\n    ASSERT_FALSE(guardian.from_proposals(mv, p, cpa));\n    ASSERT_EQ(config_type::CT_INVALID, cpa.type);\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"meta_test_base.h\"\n#include \"meta/meta_service.h\"\n\n#include <dsn/utility/fail_point.h>\n#include <runtime/rpc/network.sim.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_service_test : public meta_test_base\n{\npublic:\n    void check_status_failure()\n    {\n        fail::setup();\n        fail::cfg(\"meta_server_failure_detector_get_leader\", \"return(false#1.2.3.4:10086)\");\n\n        /** can't forward to others */\n        RPC_MOCKING(app_env_rpc)\n        {\n            rpc_address leader;\n            auto rpc = create_fake_rpc();\n            rpc.dsn_request()->header->context.u.is_forward_supported = false;\n            bool res = _ms->check_status(rpc, &leader);\n            ASSERT_EQ(false, res);\n            ASSERT_EQ(ERR_FORWARD_TO_OTHERS, rpc.response().err);\n            ASSERT_EQ(leader.to_std_string(), \"1.2.3.4:10086\");\n            ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0);\n        }\n\n        /** forward to others */\n        RPC_MOCKING(app_env_rpc)\n        {\n            auto rpc = create_fake_rpc();\n            bool res = _ms->check_status(rpc);\n            ASSERT_EQ(false, res);\n            ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 1);\n            ASSERT_EQ(app_env_rpc::forward_mail_box()[0].remote_address().to_std_string(),\n                      \"1.2.3.4:10086\");\n        }\n\n        fail::teardown();\n    }\n\n    void check_status_success()\n    {\n        fail::setup();\n        fail::cfg(\"meta_server_failure_detector_get_leader\", \"return(true#1.2.3.4:10086)\");\n\n        RPC_MOCKING(app_env_rpc)\n        {\n            rpc_address leader;\n            auto rpc = create_fake_rpc();\n            auto res = _ms->check_status(rpc, &leader);\n            ASSERT_EQ(true, res);\n            ASSERT_EQ(app_env_rpc::forward_mail_box().size(), 0);\n        }\n\n        fail::teardown();\n    }\n\n    void check_op_status_lock()\n    {\n        SetUp();\n\n        meta_op_status st = _ms->get_op_status();\n        ASSERT_EQ(meta_op_status::FREE, st);\n        bool res = _ms->try_lock_meta_op_status(meta_op_status::BULKLOAD);\n        ASSERT_TRUE(res);\n        res = _ms->try_lock_meta_op_status(meta_op_status::BULKLOAD);\n        ASSERT_FALSE(res);\n        st = _ms->get_op_status();\n        ASSERT_EQ(meta_op_status::BULKLOAD, st);\n        res = _ms->try_lock_meta_op_status(meta_op_status::BACKUP);\n        ASSERT_FALSE(res);\n        st = _ms->get_op_status();\n        ASSERT_EQ(meta_op_status::BULKLOAD, st);\n        _ms->unlock_meta_op_status();\n        st = _ms->get_op_status();\n        ASSERT_EQ(meta_op_status::FREE, st);\n\n        TearDown();\n    }\n\nprivate:\n    app_env_rpc create_fake_rpc()\n    {\n        dsn::message_ptr fake_request = dsn::message_ex::create_request(RPC_CM_UPDATE_APP_ENV);\n        configuration_update_app_env_request request;\n        ::dsn::marshall(fake_request, request);\n\n        dsn::message_ex *recvd_request = fake_request->copy(true, true);\n        std::unique_ptr<tools::sim_network_provider> sim_net(\n            new tools::sim_network_provider(nullptr, nullptr));\n        recvd_request->io_session = sim_net->create_client_session(rpc_address());\n        return app_env_rpc::auto_reply(recvd_request);\n    }\n};\n\nTEST_F(meta_service_test, check_status_failure) { check_status_failure(); }\n\nTEST_F(meta_service_test, check_status_success) { check_status_success(); }\n\nTEST_F(meta_service_test, check_op_status_lock) { check_op_status_lock(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_service_test_app.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/meta_service_app.h>\n\n#include \"meta/server_state.h\"\n#include \"meta/meta_service.h\"\n\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass spin_counter\n{\nprivate:\n    std::atomic_int _counter;\n\npublic:\n    spin_counter() { _counter.store(0); }\n    void wait()\n    {\n        while (_counter.load() != 0)\n            std::this_thread::sleep_for(std::chrono::milliseconds(500));\n    }\n    void block() { ++_counter; }\n    void notify() { --_counter; }\n};\n\nstruct reply_context\n{\n    dsn::message_ex *response;\n    spin_counter e;\n};\n\ninline dsn::message_ex *create_corresponding_receive(dsn::message_ex *request_msg)\n{\n    return request_msg->copy(true, true);\n}\n\n// fake_receiver_meta_service overrides `reply_message` of meta_service\nclass fake_receiver_meta_service : public dsn::replication::meta_service\n{\npublic:\n    fake_receiver_meta_service() : meta_service() {}\n    virtual ~fake_receiver_meta_service() {}\n    virtual void reply_message(dsn::message_ex *request, dsn::message_ex *response) override\n    {\n        uint64_t ptr;\n        dsn::unmarshall(request, ptr);\n        reply_context *ctx = reinterpret_cast<reply_context *>(ptr);\n        ctx->response = create_corresponding_receive(response);\n        ctx->response->add_ref();\n\n        // release the response\n        response->add_ref();\n        response->release_ref();\n\n        ctx->e.notify();\n    }\n};\n\n// release the dsn_message who's reference is 0\ninline void destroy_message(dsn::message_ex *msg)\n{\n    msg->add_ref();\n    msg->release_ref();\n}\n\nclass meta_service_test_app : public dsn::service_app\n{\npublic:\n    meta_service_test_app(const dsn::service_app_info *info) : service_app(info) {}\n\npublic:\n    virtual dsn::error_code start(const std::vector<std::string> &args) override;\n    virtual dsn::error_code stop(bool /*cleanup*/) { return dsn::ERR_OK; }\n    void state_sync_test();\n    void update_configuration_test();\n    void balancer_validator();\n    void balance_config_file();\n    void apply_balancer_test();\n    void cannot_run_balancer_test();\n    void construct_apps_test();\n\n    void json_compacity();\n\n    // test server_state set_app_envs/del_app_envs/clear_app_envs\n    void app_envs_basic_test();\n\n    // test for bug found\n    void adjust_dropped_size();\n\n    void call_update_configuration(\n        dsn::replication::meta_service *svc,\n        std::shared_ptr<dsn::replication::configuration_update_request> &request);\n    void call_config_sync(\n        dsn::replication::meta_service *svc,\n        std::shared_ptr<dsn::replication::configuration_query_by_node_request> &request);\n\nprivate:\n    typedef std::function<bool(const dsn::replication::app_mapper &)> state_validator;\n    bool\n    wait_state(dsn::replication::server_state *ss, const state_validator &validator, int time = -1);\n};\n\ntemplate <typename TRequest, typename RequestHandler>\nstd::shared_ptr<reply_context>\nfake_rpc_call(dsn::task_code rpc_code,\n              dsn::task_code server_state_write_code,\n              RequestHandler *handle_class,\n              void (RequestHandler::*handle)(dsn::message_ex *request),\n              const TRequest &data,\n              int hash = 0,\n              std::chrono::milliseconds delay = std::chrono::milliseconds(0))\n{\n    dsn::message_ex *msg = dsn::message_ex::create_request(rpc_code);\n    dsn::marshall(msg, data);\n\n    std::shared_ptr<reply_context> result = std::make_shared<reply_context>();\n    result->e.block();\n    uint64_t ptr = reinterpret_cast<uint64_t>(result.get());\n    dsn::marshall(msg, ptr);\n\n    dsn::message_ex *received = create_corresponding_receive(msg);\n    received->add_ref();\n    dsn::tasking::enqueue(\n        server_state_write_code, nullptr, std::bind(handle, handle_class, received), hash, delay);\n\n    // release the sending message\n    destroy_message(msg);\n\n    return result;\n}\n\n#define fake_create_app(state, request_data)                                                       \\\n    fake_rpc_call(                                                                                 \\\n        RPC_CM_CREATE_APP, LPC_META_STATE_NORMAL, state, &server_state::create_app, request_data)\n\n#define fake_drop_app(state, request_data)                                                         \\\n    fake_rpc_call(                                                                                 \\\n        RPC_CM_DROP_APP, LPC_META_STATE_NORMAL, state, &server_state::drop_app, request_data)\n\n#define fake_recall_app(state, request_data)                                                       \\\n    fake_rpc_call(                                                                                 \\\n        RPC_CM_RECALL_APP, LPC_META_STATE_NORMAL, state, &server_state::recall_app, request_data)\n\n#define fake_create_policy(state, request_data)                                                    \\\n    fake_rpc_call(RPC_CM_ADD_BACKUP_POLICY,                                                        \\\n                  LPC_DEFAULT_CALLBACK,                                                            \\\n                  state,                                                                           \\\n                  &backup_service::add_backup_policy,                                              \\\n                  request_data)\n\n#define fake_wait_rpc(context, response_data)                                                      \\\n    do {                                                                                           \\\n        context->e.wait();                                                                         \\\n        ::dsn::unmarshall(context->response, response_data);                                       \\\n        context->response->release_ref();                                                          \\\n    } while (0)\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_split_service_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_c.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n\n#include \"meta_service_test_app.h\"\n#include \"meta_test_base.h\"\n#include \"meta/meta_split_service.h\"\n#include \"meta/meta_server_failure_detector.h\"\n\nnamespace dsn {\nnamespace replication {\nclass meta_split_service_test : public meta_test_base\n{\npublic:\n    meta_split_service_test() {}\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n        create_app(NAME, PARTITION_COUNT);\n        app = find_app(NAME);\n    }\n\n    void TearDown()\n    {\n        app.reset();\n        meta_test_base::TearDown();\n    }\n\n    error_code start_partition_split(const std::string &app_name, int new_partition_count)\n    {\n        auto request = dsn::make_unique<start_partition_split_request>();\n        request->app_name = app_name;\n        request->new_partition_count = new_partition_count;\n\n        start_split_rpc rpc(std::move(request), RPC_CM_START_PARTITION_SPLIT);\n        split_svc().start_partition_split(rpc);\n        wait_all();\n        return rpc.response().err;\n    }\n\n    query_split_response query_partition_split(const std::string &app_name)\n    {\n        auto request = dsn::make_unique<query_split_request>();\n        request->app_name = app_name;\n\n        query_split_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_SPLIT);\n        split_svc().query_partition_split(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    error_code control_partition_split(const std::string &app_name,\n                                       split_control_type::type type,\n                                       const int32_t pidx,\n                                       const int32_t old_partition_count = 0)\n    {\n        auto req = make_unique<control_split_request>();\n        req->__set_app_name(app_name);\n        req->__set_control_type(type);\n        req->__set_parent_pidx(pidx);\n        req->__set_old_partition_count(old_partition_count);\n\n        control_split_rpc rpc(std::move(req), RPC_CM_CONTROL_PARTITION_SPLIT);\n        split_svc().control_partition_split(rpc);\n        wait_all();\n\n        return rpc.response().err;\n    }\n\n    error_code register_child(int32_t parent_index, ballot req_parent_ballot, bool wait_zk)\n    {\n        partition_configuration parent_config;\n        parent_config.ballot = req_parent_ballot;\n        parent_config.last_committed_decree = 5;\n        parent_config.max_replica_count = 3;\n        parent_config.pid = gpid(app->app_id, parent_index);\n\n        partition_configuration child_config;\n        child_config.ballot = PARENT_BALLOT + 1;\n        child_config.last_committed_decree = 5;\n        child_config.pid = gpid(app->app_id, parent_index + PARTITION_COUNT);\n\n        // mock node state\n        node_state node;\n        node.put_partition(gpid(app->app_id, PARENT_INDEX), true);\n        mock_node_state(NODE, node);\n\n        auto request = dsn::make_unique<register_child_request>();\n        request->app.app_name = app->app_name;\n        request->app.app_id = app->app_id;\n        request->parent_config = parent_config;\n        request->child_config = child_config;\n        request->primary_address = NODE;\n\n        register_child_rpc rpc(std::move(request), RPC_CM_REGISTER_CHILD_REPLICA);\n        split_svc().register_child_on_meta(rpc);\n        wait_all();\n        if (wait_zk) {\n            std::this_thread::sleep_for(std::chrono::milliseconds(100));\n        }\n        return rpc.response().err;\n    }\n\n    error_code notify_stop_split(split_status::type req_split_status)\n    {\n        auto req = make_unique<notify_stop_split_request>();\n        req->__set_app_name(NAME);\n        req->__set_parent_gpid(dsn::gpid(app->app_id, PARENT_INDEX));\n        req->__set_meta_split_status(req_split_status);\n        req->__set_partition_count(PARTITION_COUNT);\n\n        notify_stop_split_rpc rpc(std::move(req), RPC_CM_NOTIFY_STOP_SPLIT);\n        split_svc().notify_stop_split(rpc);\n        wait_all();\n\n        return rpc.response().err;\n    }\n\n    query_child_state_response query_child_state()\n    {\n        auto req = make_unique<query_child_state_request>();\n        req->__set_app_name(NAME);\n        req->__set_pid(dsn::gpid(app->app_id, PARENT_INDEX));\n        req->__set_partition_count(PARTITION_COUNT);\n\n        query_child_state_rpc rpc(std::move(req), RPC_CM_QUERY_CHILD_STATE);\n        split_svc().query_child_state(rpc);\n        wait_all();\n\n        return rpc.response();\n    }\n\n    int32_t on_config_sync(configuration_query_by_node_request req)\n    {\n        auto request = make_unique<configuration_query_by_node_request>(req);\n        configuration_query_by_node_rpc rpc(std::move(request), RPC_CM_CONFIG_SYNC);\n        _ss->on_config_sync(rpc);\n        wait_all();\n        int32_t splitting_count = 0;\n        for (auto p : rpc.response().partitions) {\n            if (p.__isset.meta_split_status) {\n                ++splitting_count;\n            }\n        }\n        return splitting_count;\n    }\n\n    void mock_app_partition_split_context()\n    {\n        app->partition_count = NEW_PARTITION_COUNT;\n        app->partitions.resize(app->partition_count);\n        app->helpers->contexts.resize(app->partition_count);\n        app->helpers->split_states.splitting_count = app->partition_count / 2;\n        for (int i = 0; i < app->partition_count; ++i) {\n            app->helpers->contexts[i].config_owner = &app->partitions[i];\n            app->partitions[i].pid = gpid(app->app_id, i);\n            if (i >= app->partition_count / 2) {\n                app->partitions[i].ballot = invalid_ballot;\n            } else {\n                app->partitions[i].ballot = PARENT_BALLOT;\n                app->helpers->contexts[i].stage = config_status::not_pending;\n                app->helpers->split_states.status[i] = split_status::SPLITTING;\n            }\n        }\n    }\n\n    void clear_app_partition_split_context()\n    {\n        app->partition_count = PARTITION_COUNT;\n        app->partitions.resize(app->partition_count);\n        app->helpers->contexts.resize(app->partition_count);\n        app->helpers->split_states.splitting_count = 0;\n        app->helpers->split_states.status.clear();\n    }\n\n    void mock_only_one_partition_split(split_status::type split_status)\n    {\n        app->partition_count = NEW_PARTITION_COUNT;\n        app->partitions.resize(app->partition_count);\n        app->helpers->contexts.resize(app->partition_count);\n        for (int i = 0; i < app->partition_count; ++i) {\n            app->helpers->contexts[i].config_owner = &app->partitions[i];\n            app->partitions[i].pid = dsn::gpid(app->app_id, i);\n            if (i >= app->partition_count / 2) {\n                app->partitions[i].ballot = invalid_ballot;\n            } else {\n                app->partitions[i].ballot = PARENT_BALLOT;\n                app->helpers->contexts[i].stage = config_status::not_pending;\n            }\n        }\n        app->helpers->split_states.splitting_count = 1;\n        app->helpers->split_states.status[PARENT_INDEX] = split_status;\n    }\n\n    void mock_child_registered()\n    {\n        app->partitions[CHILD_INDEX].ballot = PARENT_BALLOT;\n        app->helpers->split_states.splitting_count--;\n        app->helpers->split_states.status.erase(PARENT_INDEX);\n    }\n\n    void mock_split_states(split_status::type status, int32_t parent_index = -1)\n    {\n        if (parent_index != -1) {\n            app->helpers->split_states.status[parent_index] = status;\n        } else {\n            auto partition_count = app->partition_count;\n            for (auto i = 0; i < partition_count / 2; ++i) {\n                app->helpers->split_states.status[i] = status;\n            }\n        }\n    }\n\n    bool check_split_status(split_status::type expected_status, int32_t parent_index = -1)\n    {\n        auto app = find_app(NAME);\n        if (parent_index != -1) {\n            return (app->helpers->split_states.status[parent_index] == expected_status);\n        } else {\n            for (const auto kv : app->helpers->split_states.status) {\n                if (kv.second != expected_status) {\n                    return false;\n                }\n            }\n            return true;\n        }\n    }\n\n    void initialize_meta_server_with_mock_app()\n    {\n        // initialize meta service\n        auto meta_svc = new fake_receiver_meta_service();\n        meta_svc->remote_storage_initialize();\n\n        // initialize server_state\n        auto state = meta_svc->_state;\n        state->initialize(meta_svc, meta_svc->_cluster_root + \"/apps\");\n        meta_svc->_started = true;\n        _ms.reset(meta_svc);\n\n        // initialize bulk load service\n        _ms->_split_svc = make_unique<meta_split_service>(_ms.get());\n\n        // mock splitting app\n        create_splitting_app_on_remote_stroage(state->_apps_root);\n        state->initialize_data_structure();\n\n        _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get()));\n        _ss = _ms->_state;\n    }\n\n    void create_splitting_app_on_remote_stroage(const std::string &app_root)\n    {\n        static const char *lock_state = \"lock\";\n        static const char *unlock_state = \"unlock\";\n        std::string path = app_root;\n\n        _ms->get_meta_storage()->create_node(\n            std::move(path), blob(lock_state, 0, strlen(lock_state)), [&app_root]() {\n                ddebug_f(\"create app root {}\", app_root);\n            });\n        wait_all();\n\n        // create splitting app\n        app_info ainfo;\n        ainfo.app_id = 1;\n        ainfo.app_name = NAME;\n        ainfo.app_type = \"pegasus\";\n        ainfo.is_stateful = true;\n        ainfo.max_replica_count = 3;\n        ainfo.partition_count = NEW_PARTITION_COUNT;\n        ainfo.init_partition_count = PARTITION_COUNT;\n        ainfo.status = app_status::AS_AVAILABLE;\n\n        blob value = json::json_forwarder<app_info>::encode(ainfo);\n        _ms->get_meta_storage()->create_node(\n            app_root + \"/\" + boost::lexical_cast<std::string>(ainfo.app_id),\n            std::move(value),\n            [this, &app_root, &ainfo]() {\n                ddebug_f(\"create app({}) app_id={}, dir succeed\", ainfo.app_name, ainfo.app_id);\n                for (int i = 0; i < ainfo.init_partition_count; ++i) {\n                    create_partition_configuration_on_remote_storage(app_root, ainfo.app_id, i);\n                }\n                create_partition_configuration_on_remote_storage(\n                    app_root, ainfo.app_id, CHILD_INDEX);\n            });\n        wait_all();\n\n        std::string root = app_root;\n        _ms->get_meta_storage()->set_data(\n            std::move(root), blob(unlock_state, 0, strlen(unlock_state)), []() {});\n        wait_all();\n    }\n\n    void create_partition_configuration_on_remote_storage(const std::string &app_root,\n                                                          const int32_t app_id,\n                                                          const int32_t pidx)\n    {\n        partition_configuration config;\n        config.max_replica_count = 3;\n        config.pid = gpid(app_id, pidx);\n        config.ballot = PARENT_BALLOT;\n        blob value = json::json_forwarder<partition_configuration>::encode(config);\n        _ms->get_meta_storage()->create_node(\n            app_root + \"/\" + boost::lexical_cast<std::string>(app_id) + \"/\" +\n                boost::lexical_cast<std::string>(pidx),\n            std::move(value),\n            [app_id, pidx, this]() {\n                ddebug_f(\"create app({}), partition({}.{}) dir succeed\", NAME, app_id, pidx);\n            });\n    }\n\n    const std::string NAME = \"split_table\";\n    const int32_t PARTITION_COUNT = 4;\n    const int32_t NEW_PARTITION_COUNT = 8;\n    const int32_t PARENT_BALLOT = 3;\n    const int32_t PARENT_INDEX = 0;\n    const int32_t CHILD_INDEX = 4;\n    const rpc_address NODE = rpc_address(\"127.0.0.1\", 10086);\n    std::shared_ptr<app_state> app;\n};\n\n// start split unit tests\nTEST_F(meta_split_service_test, start_split_test)\n{\n    // Test case:\n    // - app not existed\n    // - wrong partition_count\n    // - app already splitting\n    // - start split succeed\n    struct start_test\n    {\n        std::string app_name;\n        int32_t new_partition_count;\n        bool need_mock_splitting;\n        error_code expected_err;\n        int32_t expected_partition_count;\n    } tests[] = {{\"table_not_exist\", PARTITION_COUNT, false, ERR_APP_NOT_EXIST, PARTITION_COUNT},\n                 {NAME, PARTITION_COUNT, false, ERR_INVALID_PARAMETERS, PARTITION_COUNT},\n                 {NAME, NEW_PARTITION_COUNT, true, ERR_BUSY, PARTITION_COUNT},\n                 {NAME, NEW_PARTITION_COUNT, false, ERR_OK, NEW_PARTITION_COUNT}};\n\n    for (auto test : tests) {\n        auto app = find_app(NAME);\n        app->helpers->split_states.splitting_count = test.need_mock_splitting ? PARTITION_COUNT : 0;\n        ASSERT_EQ(start_partition_split(test.app_name, test.new_partition_count),\n                  test.expected_err);\n        ASSERT_EQ(app->partition_count, test.expected_partition_count);\n        if (test.expected_err == ERR_OK) {\n            ASSERT_EQ(app->envs[replica_envs::SPLIT_VALIDATE_PARTITION_HASH], \"true\");\n        }\n    }\n}\n\n// query split unit tests\nTEST_F(meta_split_service_test, query_split_test)\n{\n    // Test case:\n    // - app not existed\n    // - app not splitting\n    // - query split succeed\n    struct query_test\n    {\n        std::string app_name;\n        bool mock_splitting;\n        error_code expected_err;\n    } tests[] = {\n        {\"table_not_exist\", false, ERR_APP_NOT_EXIST},\n        {NAME, false, ERR_INVALID_STATE},\n        {NAME, true, ERR_OK},\n    };\n\n    for (auto test : tests) {\n        if (test.mock_splitting) {\n            mock_app_partition_split_context();\n        }\n        auto resp = query_partition_split(test.app_name);\n        ASSERT_EQ(resp.err, test.expected_err);\n        if (resp.err == ERR_OK) {\n            ASSERT_EQ(resp.new_partition_count, NEW_PARTITION_COUNT);\n            ASSERT_EQ(resp.status.size(), PARTITION_COUNT);\n        }\n        if (test.mock_splitting) {\n            clear_app_partition_split_context();\n        }\n    }\n}\n\n// register child unit tests\nTEST_F(meta_split_service_test, register_child_test)\n{\n    // Test case:\n    // - request is out-dated\n    // - child has been registered\n    // - parent partition has been paused splitting\n    // - parent partition is sync config to remote storage\n    // - register child succeed\n    struct register_test\n    {\n        int32_t parent_ballot;\n        bool mock_child_registered;\n        bool mock_parent_paused;\n        bool mock_pending;\n        error_code expected_err;\n        bool wait_zk;\n    } tests[] = {\n        {PARENT_BALLOT - 1, false, false, false, ERR_INVALID_VERSION, false},\n        {PARENT_BALLOT, true, false, false, ERR_CHILD_REGISTERED, false},\n        {PARENT_BALLOT, false, true, false, ERR_INVALID_STATE, false},\n        {PARENT_BALLOT, false, false, true, ERR_IO_PENDING, false},\n        {PARENT_BALLOT, false, false, false, ERR_OK, true},\n    };\n\n    for (auto test : tests) {\n        mock_app_partition_split_context();\n        if (test.mock_child_registered) {\n            mock_child_registered();\n        }\n        if (test.mock_parent_paused) {\n            mock_split_states(split_status::PAUSED, PARENT_INDEX);\n        }\n        if (test.mock_pending) {\n            app->helpers->contexts[PARENT_INDEX].stage = config_status::pending_remote_sync;\n        }\n        ASSERT_EQ(register_child(PARENT_INDEX, test.parent_ballot, test.wait_zk),\n                  test.expected_err);\n    }\n}\n\n// config sync unit tests\nTEST_F(meta_split_service_test, on_config_sync_test)\n{\n    create_app(\"not_splitting_app\");\n    auto not_splitting_app = find_app(\"not_splitting_app\");\n    gpid pid1 = gpid(app->app_id, PARENT_INDEX);\n    gpid pid2 = gpid(not_splitting_app->app_id, 0);\n    // mock meta server node state\n    node_state node;\n    node.put_partition(pid1, true);\n    node.put_partition(pid2, true);\n    mock_node_state(NODE, node);\n    // mock request\n    replica_info info1, info2;\n    info1.pid = pid1;\n    info2.pid = pid2;\n    configuration_query_by_node_request req;\n    req.node = NODE;\n    req.__isset.stored_replicas = true;\n    req.stored_replicas.emplace_back(info1);\n    req.stored_replicas.emplace_back(info2);\n\n    // Test case:\n    // - partition is splitting\n    // - partition is not splitting\n    // - partition split is paused\n    struct config_sync_test\n    {\n        bool mock_child_registered;\n        bool mock_parent_paused;\n        int32_t expected_count;\n    } tests[] = {{false, false, 1}, {true, false, 0}, {false, true, 1}};\n\n    for (const auto &test : tests) {\n        mock_app_partition_split_context();\n        if (test.mock_child_registered) {\n            mock_child_registered();\n        }\n        if (test.mock_parent_paused) {\n            mock_split_states(split_status::PAUSED, PARENT_INDEX);\n        }\n        ASSERT_EQ(on_config_sync(req), test.expected_count);\n    }\n\n    drop_app(\"not_splitting_app\");\n}\n\n/// control split unit tests\nTEST_F(meta_split_service_test, pause_or_restart_single_partition_test)\n{\n    // Test case:\n    // - pause with wrong pidx\n    // - pause with partition is not splitting\n    // - pause with partition split_status = pausing\n    // - pause with partition split_status = paused\n    // - pause with partition split_status = canceling\n    // - pause with partition split_status = splitting\n    // - restart with partition is not splitting\n    // - restart with partition split_status = pausing\n    // - restart with partition split_status = paused\n    // - restart with partition split_status = canceling\n    // - restart with partition split_status = splitting\n    struct control_single_partition_test\n    {\n        int32_t pidx;\n        split_status::type cur_status;\n        split_control_type::type control_type;\n        error_code expected_err;\n        split_status::type expected_status;\n    } tests[] = {{NEW_PARTITION_COUNT,\n                  split_status::SPLITTING,\n                  split_control_type::PAUSE,\n                  ERR_INVALID_PARAMETERS,\n                  split_status::SPLITTING},\n                 {PARENT_INDEX,\n                  split_status::NOT_SPLIT,\n                  split_control_type::PAUSE,\n                  ERR_CHILD_REGISTERED,\n                  split_status::NOT_SPLIT},\n                 {PARENT_INDEX,\n                  split_status::PAUSING,\n                  split_control_type::PAUSE,\n                  ERR_INVALID_STATE,\n                  split_status::PAUSING},\n                 {PARENT_INDEX,\n                  split_status::PAUSED,\n                  split_control_type::PAUSE,\n                  ERR_INVALID_STATE,\n                  split_status::PAUSED},\n                 {PARENT_INDEX,\n                  split_status::CANCELING,\n                  split_control_type::PAUSE,\n                  ERR_INVALID_STATE,\n                  split_status::CANCELING},\n                 {PARENT_INDEX,\n                  split_status::SPLITTING,\n                  split_control_type::PAUSE,\n                  ERR_OK,\n                  split_status::PAUSING},\n                 {PARENT_INDEX,\n                  split_status::NOT_SPLIT,\n                  split_control_type::RESTART,\n                  ERR_INVALID_STATE,\n                  split_status::NOT_SPLIT},\n                 {PARENT_INDEX,\n                  split_status::PAUSING,\n                  split_control_type::RESTART,\n                  ERR_INVALID_STATE,\n                  split_status::PAUSING},\n                 {PARENT_INDEX,\n                  split_status::PAUSED,\n                  split_control_type::RESTART,\n                  ERR_OK,\n                  split_status::SPLITTING},\n                 {PARENT_INDEX,\n                  split_status::CANCELING,\n                  split_control_type::RESTART,\n                  ERR_INVALID_STATE,\n                  split_status::CANCELING},\n                 {PARENT_INDEX,\n                  split_status::SPLITTING,\n                  split_control_type::RESTART,\n                  ERR_INVALID_STATE,\n                  split_status::SPLITTING}};\n\n    for (auto test : tests) {\n        mock_app_partition_split_context();\n        if (test.cur_status == split_status::NOT_SPLIT) {\n            mock_child_registered();\n        } else {\n            mock_split_states(test.cur_status, PARENT_INDEX);\n        }\n        ASSERT_EQ(control_partition_split(NAME, test.control_type, test.pidx, PARTITION_COUNT),\n                  test.expected_err);\n        if (test.expected_err == ERR_OK) {\n            ASSERT_TRUE(check_split_status(test.expected_status, test.pidx));\n        }\n        clear_app_partition_split_context();\n    }\n}\n\nTEST_F(meta_split_service_test, pause_or_restart_multi_partitions_test)\n{\n    // Test case:\n    // - app not existed\n    // - app is not splitting\n    // - pausing all splitting partitions succeed\n    // - restart all paused partitions succeed\n    struct control_multi_partitions_test\n    {\n        bool mock_split_context;\n        std::string app_name;\n        split_control_type::type control_type;\n        error_code expected_err;\n    } tests[] = {{false, \"table_not_exist\", split_control_type::PAUSE, ERR_APP_NOT_EXIST},\n                 {false, NAME, split_control_type::RESTART, ERR_INVALID_STATE},\n                 {true, NAME, split_control_type::PAUSE, ERR_OK},\n                 {true, NAME, split_control_type::RESTART, ERR_OK}};\n\n    for (auto test : tests) {\n        if (test.mock_split_context) {\n            mock_app_partition_split_context();\n            if (test.control_type == split_control_type::RESTART) {\n                mock_split_states(split_status::PAUSED, -1);\n            }\n        }\n        error_code ec =\n            control_partition_split(test.app_name, test.control_type, -1, PARTITION_COUNT);\n        ASSERT_EQ(ec, test.expected_err);\n        if (test.expected_err == ERR_OK) {\n            split_status::type expected_status = test.control_type == split_control_type::PAUSE\n                                                     ? split_status::PAUSING\n                                                     : split_status::SPLITTING;\n            ASSERT_TRUE(check_split_status(expected_status, -1));\n        }\n        if (test.mock_split_context) {\n            clear_app_partition_split_context();\n        }\n    }\n}\n\nTEST_F(meta_split_service_test, cancel_split_test)\n{\n    // Test case:\n    // - wrong partition count\n    // - cancel split with child registered\n    // - cancel succeed\n    struct cancel_test\n    {\n        int32_t old_partition_count;\n        bool mock_child_registered;\n        error_code expected_err;\n        bool check_status;\n    } tests[] = {{NEW_PARTITION_COUNT, false, ERR_INVALID_PARAMETERS, false},\n                 {PARTITION_COUNT, true, ERR_CHILD_REGISTERED, false},\n                 {PARTITION_COUNT, false, ERR_OK, true}};\n\n    for (auto test : tests) {\n        mock_app_partition_split_context();\n        if (test.mock_child_registered) {\n            mock_child_registered();\n        }\n\n        ASSERT_EQ(\n            control_partition_split(NAME, split_control_type::CANCEL, -1, test.old_partition_count),\n            test.expected_err);\n        if (test.check_status) {\n            auto app = find_app(NAME);\n            ASSERT_EQ(app->partition_count, NEW_PARTITION_COUNT);\n            ASSERT_EQ(app->helpers->split_states.splitting_count, PARTITION_COUNT);\n            check_split_status(split_status::CANCELING, -1);\n        }\n        clear_app_partition_split_context();\n    }\n}\n\n// notify stop split unit tests\nTEST_F(meta_split_service_test, notify_stop_split_test)\n{\n    // Test case:\n    // - request split pausing, meta not_split\n    // - request split pausing, meta paused\n    // - request split pausing, meta splitting\n    // - request split pausing, meta pausing\n    // - request split pausing, meta canceling\n    // - request split canceling, meta not_split\n    // - request split canceling, meta paused\n    // - request split canceling, meta splitting\n    // - request split canceling, meta pausing\n    // - request split canceling, meta canceling\n    // - request split canceling, meta canceling, last cancel request\n    struct notify_stop_split_test\n    {\n        split_status::type req_split_status;\n        split_status::type meta_split_status;\n        bool last_canceled;\n        error_code expected_err;\n        split_status::type expected_status;\n    } tests[] = {\n        {split_status::PAUSING,\n         split_status::NOT_SPLIT,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::NOT_SPLIT},\n        {split_status::PAUSING,\n         split_status::PAUSED,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::PAUSED},\n        {split_status::PAUSING,\n         split_status::SPLITTING,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::SPLITTING},\n        {split_status::PAUSING, split_status::PAUSING, false, ERR_OK, split_status::PAUSING},\n        {split_status::PAUSING,\n         split_status::CANCELING,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::CANCELING},\n        {split_status::CANCELING,\n         split_status::NOT_SPLIT,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::NOT_SPLIT},\n        {split_status::CANCELING,\n         split_status::PAUSED,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::PAUSED},\n        {split_status::CANCELING,\n         split_status::SPLITTING,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::SPLITTING},\n        {split_status::CANCELING,\n         split_status::PAUSING,\n         false,\n         ERR_INVALID_VERSION,\n         split_status::PAUSING},\n        {split_status::CANCELING, split_status::CANCELING, false, ERR_OK, split_status::NOT_SPLIT},\n        {split_status::CANCELING, split_status::CANCELING, true, ERR_OK, split_status::NOT_SPLIT}};\n\n    for (auto test : tests) {\n        if (test.last_canceled) {\n            mock_only_one_partition_split(split_status::CANCELING);\n        } else {\n            mock_app_partition_split_context();\n            if (test.meta_split_status == split_status::NOT_SPLIT) {\n                mock_child_registered();\n            } else {\n                mock_split_states(test.meta_split_status, PARENT_INDEX);\n            }\n        }\n\n        ASSERT_EQ(notify_stop_split(test.req_split_status), test.expected_err);\n        if (test.last_canceled) {\n            auto app = find_app(NAME);\n            ASSERT_EQ(app->partition_count, PARTITION_COUNT);\n            ASSERT_EQ(app->helpers->split_states.splitting_count, 0);\n        } else if (test.expected_status != split_status::NOT_SPLIT) {\n            auto app = find_app(NAME);\n            ASSERT_EQ(app->partition_count, NEW_PARTITION_COUNT);\n            check_split_status(test.expected_status, PARENT_INDEX);\n        }\n\n        clear_app_partition_split_context();\n    }\n}\n\nTEST_F(meta_split_service_test, query_child_state_test)\n{\n    // Test case:\n    // - other partition is still canceling\n    // - app split canceled\n    // - child partition registered\n    struct query_child_state_test\n    {\n        bool mock_splitting;\n        bool mock_registered;\n        error_code expected_err;\n    } tests[] = {\n        {true, false, ERR_INVALID_STATE}, {false, false, ERR_INVALID_STATE}, {true, true, ERR_OK}};\n\n    for (const auto &test : tests) {\n        if (test.mock_splitting) {\n            mock_app_partition_split_context();\n        }\n        if (test.mock_registered) {\n            mock_child_registered();\n        }\n        auto resp = query_child_state();\n        ASSERT_EQ(resp.err, test.expected_err);\n        if (resp.err == ERR_OK) {\n            ASSERT_EQ(resp.partition_count, NEW_PARTITION_COUNT);\n        }\n        if (test.mock_splitting) {\n            clear_app_partition_split_context();\n        }\n    }\n}\n\nclass meta_split_service_failover_test : public meta_split_service_test\n{\npublic:\n    void SetUp() {}\n    void TearDown() { meta_test_base::TearDown(); }\n};\n\nTEST_F(meta_split_service_failover_test, half_split_test)\n{\n    initialize_meta_server_with_mock_app();\n    auto app = find_app(NAME);\n    auto split_states = app->helpers->split_states;\n    ASSERT_EQ(split_states.splitting_count, PARTITION_COUNT - 1);\n    ASSERT_EQ(split_states.status.find(PARENT_INDEX), split_states.status.end());\n    ASSERT_EQ(app->partition_count, NEW_PARTITION_COUNT);\n    ASSERT_EQ(app->partitions.size(), NEW_PARTITION_COUNT);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_state/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_meta_state_tests)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#  Search mode for source files under CURRENT project directory?\n#  \"GLOB_RECURSE\" for recursive search\n#  \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \n    dsn_meta_server\n    dsn_replica_server\n    dsn_replication_common\n    dsn_runtime\n    hashtable\n    gtest\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"run.sh\"\n    \"clear.sh\"\n    \"clear.cmd\"\n    \"config-test.ini\"\n    \"gtest.filter\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/meta/test/meta_state/clear.cmd",
    "content": "rem The MIT License (MIT)\nrem\nrem Copyright (c) 2015 Microsoft Corporation\nrem\nrem -=- Robust Distributed System Nucleus (rDSN) -=-\nrem\nrem Permission is hereby granted, free of charge, to any person obtaining a copy\nrem of this software and associated documentation files (the \"Software\"), to deal\nrem in the Software without restriction, including without limitation the rights\nrem to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\nrem copies of the Software, and to permit persons to whom the Software is\nrem furnished to do so, subject to the following conditions:\nrem\nrem The above copyright notice and this permission notice shall be included in\nrem all copies or substantial portions of the Software.\nrem\nrem THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nrem IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nrem FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nrem AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nrem LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nrem OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nrem THE SOFTWARE.\n\n@ECHO OFF\n@rmdir /Q /S data\n@rmdir /Q /S nfs_test_dir* \nDEL /Q *.tmp \nDEL /Q command.copy.txt\n"
  },
  {
    "path": "src/meta/test/meta_state/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf client core* log.* *.log data\n"
  },
  {
    "path": "src/meta/test/meta_state/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK, THREAD_POOL_REPLICATION, THREAD_POOL_REPLICATION_LONG, THREAD_POOL_BLOCK_SERVICE\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = false\ncount = 0\npools = THREAD_POOL_DEFAULT, THREAD_POOL_REPLICATION\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 0\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[zookeeper]\nhosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n"
  },
  {
    "path": "src/meta/test/meta_state/gtest.filter",
    "content": "config-test.ini *\n"
  },
  {
    "path": "src/meta/test/meta_state/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <iostream>\n\n#ifndef _WIN32\n#include <sys/types.h>\n#include <signal.h>\n#endif\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass test_client : public ::dsn::service_app\n{\npublic:\n    test_client(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    ::dsn::error_code start(const std::vector<std::string> &args)\n    {\n        int argc = args.size();\n        char *argv[20];\n        for (int i = 0; i < argc; ++i) {\n            argv[i] = (char *)(args[i].c_str());\n        }\n        testing::InitGoogleTest(&argc, argv);\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        /*\n                // exit without any destruction\n        # if defined(_WIN32)\n                ::ExitProcess(0);\n        # else\n                kill(getpid(), SIGKILL);\n        # endif\n        */\n        return ::dsn::ERR_OK;\n    }\n\n    ::dsn::error_code stop(bool cleanup = false) { return ::dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    // register all possible services\n    dsn::service_app::register_factory<test_client>(\"test\");\n\n    // specify what services and tools will run in config file, then run\n    if (argc < 2)\n        dsn_run_config(\"config-test.ini\", false);\n    else\n        dsn_run_config(argv[1], false);\n\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(g_test_ret);\n#endif\n    return g_test_ret;\n}\n"
  },
  {
    "path": "src/meta/test/meta_state/meta_state_service.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/meta_state_service.h>\n#include <boost/lexical_cast.hpp>\n\n#include <gtest/gtest.h>\n#include <chrono>\n#include <thread>\n\n#include \"meta/meta_state_service_simple.h\"\n#include \"meta/meta_state_service_zookeeper.h\"\n\nusing namespace dsn;\nusing namespace dsn::dist;\n\nDEFINE_TASK_CODE(META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT);\n\ntypedef std::function<meta_state_service *()> service_creator_func;\ntypedef std::function<void(meta_state_service *)> service_deleter_func;\n\n#define expect_ok [](error_code ec) { EXPECT_TRUE(ec == ERR_OK); }\n#define expect_err [](error_code ec) { EXPECT_FALSE(ec == ERR_OK); }\n\nvoid provider_basic_test(const service_creator_func &service_creator,\n                         const service_deleter_func &service_deleter)\n{\n    // environment\n    auto service = service_creator();\n\n    // bondary check\n    service->node_exist(\"/\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n    service->node_exist(\"\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)->wait();\n    // recursive delete test\n    {\n        service->create_node(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->node_exist(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->create_node(\"/1/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->get_children(\"/1\",\n                              META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                              [](error_code ec, const std::vector<std::string> &children) {\n                                  dassert(ec == ERR_OK && children.size() == 1 &&\n                                              *children.begin() == \"1\",\n                                          \"unexpected child\");\n                              });\n        service->node_exist(\"/1/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->delete_node(\"/1\", false, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)\n            ->wait();\n        service->delete_node(\"/1\", true, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)\n            ->wait();\n        service->node_exist(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)->wait();\n    }\n    // repeat create test\n    {\n        service->create_node(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->create_node(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)->wait();\n    }\n    // check replay\n    {\n        service_deleter(service);\n        service = service_creator();\n        service->node_exist(\"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)->wait();\n        service->node_exist(\"/1/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)->wait();\n        service->delete_node(\"/1\", false, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)\n            ->wait();\n    }\n    // set & get data\n    {\n        dsn::binary_writer writer;\n        writer.write(0xdeadbeef);\n        service\n            ->create_node(\n                \"/1\", META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok, writer.get_buffer())\n            ->wait();\n        service\n            ->get_data(\"/1\",\n                       META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                       [](error_code ec, const dsn::blob &value) {\n                           expect_ok(ec);\n                           dsn::binary_reader reader(value);\n                           int read_value;\n                           reader.read(read_value);\n                           dassert(read_value == 0xdeadbeef, \"get_value != create_value\");\n                       })\n            ->wait();\n        writer = dsn::binary_writer();\n        writer.write(0xbeefdead);\n        service\n            ->set_data(\n                \"/1\", writer.get_buffer(), META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)\n            ->wait();\n        service\n            ->get_data(\"/1\",\n                       META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                       [](error_code ec, const dsn::blob &value) {\n                           expect_ok(ec);\n                           dsn::binary_reader reader(value);\n                           int read_value;\n                           reader.read(read_value);\n                           dassert(read_value == 0xbeefdead, \"get_value != create_value\");\n                       })\n            ->wait();\n    }\n    // clean the node created in previos code-block, to support test in next round\n    {\n        service->delete_node(\"/1\", false, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)\n            ->wait();\n    }\n\n    typedef dsn::dist::meta_state_service::transaction_entries TEntries;\n    // transaction op\n    {\n        // basic\n        dsn::binary_writer writer;\n        writer.write(0xdeadbeef);\n        std::shared_ptr<TEntries> entries = service->new_transaction_entries(5);\n        entries->create_node(\"/2\");\n        entries->create_node(\"/2/2\");\n        entries->create_node(\"/2/3\");\n        entries->set_data(\"/2\", writer.get_buffer());\n        entries->delete_node(\"/2/3\");\n\n        auto tsk = service->submit_transaction(\n            entries, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok);\n        tsk->wait();\n        for (unsigned int i = 0; i < 5; ++i) {\n            EXPECT_TRUE(entries->get_result(i) == ERR_OK);\n        }\n\n        // an invalid operation will stop whole transaction\n        entries = service->new_transaction_entries(5);\n        entries->create_node(\"/3\");\n        entries->create_node(\"/4\");\n        entries->delete_node(\"/2\"); // delete a non empty dir\n        entries->create_node(\"/5\");\n\n        service->submit_transaction(entries, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)\n            ->wait();\n        error_code err[4] = {ERR_OK, ERR_OK, ERR_INVALID_PARAMETERS, ERR_INCONSISTENT_STATE};\n        for (unsigned int i = 0; i < 4; ++i)\n            EXPECT_EQ(err[i], entries->get_result(i));\n\n        // another invalid transaction\n        entries = service->new_transaction_entries(5);\n        entries->create_node(\"/3\");\n        entries->create_node(\"/4\");\n        entries->delete_node(\"/5\"); // delete a non exist dir\n        // although this is also invalid, but ignored due to previous one has stop the transaction\n        entries->set_data(\"/5\", writer.get_buffer());\n\n        err[2] = ERR_OBJECT_NOT_FOUND;\n        service->submit_transaction(entries, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_err)\n            ->wait();\n        for (unsigned int i = 0; i < 4; ++i)\n            EXPECT_EQ(err[i], entries->get_result(i));\n    }\n\n    // check replay with transaction\n    {\n        service_deleter(service);\n        service = service_creator();\n\n        service\n            ->get_children(\"/2\",\n                           META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                           [](error_code ec, const std::vector<std::string> &children) {\n                               ASSERT_TRUE(children.size() == 1 && children[0] == \"2\");\n                           })\n            ->wait();\n\n        service\n            ->get_data(\"/2\",\n                       META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                       [](error_code ec, const blob &value) {\n                           ASSERT_TRUE(ec == ERR_OK);\n                           binary_reader reader(value);\n                           int content_value;\n                           reader.read(content_value);\n                           ASSERT_TRUE(content_value == 0xdeadbeef);\n                       })\n            ->wait();\n    }\n\n    // delete the nodes created just now, using transaction delete\n    {\n        std::shared_ptr<TEntries> entries = service->new_transaction_entries(2);\n        entries->delete_node(\"/2/2\");\n        entries->delete_node(\"/2\");\n\n        service->submit_transaction(entries, META_STATE_SERVICE_SIMPLE_TEST_CALLBACK, expect_ok)\n            ->wait();\n        error_code err[2] = {ERR_OK, ERR_OK};\n\n        for (unsigned int i = 0; i < 2; ++i)\n            EXPECT_EQ(err[i], entries->get_result(i));\n    }\n\n    service_deleter(service);\n}\n\nvoid recursively_create_node_callback(meta_state_service *service,\n                                      dsn::task_tracker *tracker,\n                                      const std::string &root,\n                                      int current_layer,\n                                      error_code ec)\n{\n    ASSERT_TRUE(ec == ERR_OK);\n    if (current_layer <= 0)\n        return;\n\n    for (int i = 0; i != 10; ++i) {\n        std::string subroot = root + \"/\" + boost::lexical_cast<std::string>(i);\n        service->create_node(subroot,\n                             META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                             std::bind(recursively_create_node_callback,\n                                       service,\n                                       tracker,\n                                       subroot,\n                                       current_layer - 1,\n                                       std::placeholders::_1),\n                             blob(),\n                             tracker);\n    }\n}\n\nvoid provider_recursively_create_delete_test(const service_creator_func &creator,\n                                             const service_deleter_func &deleter)\n{\n    meta_state_service *service = creator();\n    dsn::task_tracker tracker;\n\n    service\n        ->delete_node(\"/r\",\n                      true,\n                      META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n                      [](error_code ec) { ddebug(\"result: %s\", ec.to_string()); })\n        ->wait();\n    service->create_node(\n        \"/r\",\n        META_STATE_SERVICE_SIMPLE_TEST_CALLBACK,\n        std::bind(\n            recursively_create_node_callback, service, &tracker, \"/r\", 1, std::placeholders::_1),\n        blob(),\n        &tracker);\n    tracker.wait_outstanding_tasks();\n\n    std::this_thread::sleep_for(std::chrono::seconds(1));\n    deleter(service);\n}\n\n#undef expect_ok\n#undef expect_err\n\nTEST(meta_state_service, simple)\n{\n    auto simple_service_creator = [] {\n        meta_state_service_simple *svc = new meta_state_service_simple();\n        svc->initialize({});\n        return svc;\n    };\n    auto simple_service_deleter = [](meta_state_service *simple_svc) { delete simple_svc; };\n\n    provider_basic_test(simple_service_creator, simple_service_deleter);\n    provider_recursively_create_delete_test(simple_service_creator, simple_service_deleter);\n}\n\nTEST(meta_state_service, zookeeper)\n{\n    auto zookeeper_service_creator = [] {\n        meta_state_service_zookeeper *svc = new meta_state_service_zookeeper();\n        svc->initialize({});\n        return svc;\n    };\n    auto zookeeper_service_deleter = [](meta_state_service *zookeeper_svc) {\n        ASSERT_EQ(zookeeper_svc->finalize(), ERR_OK);\n    };\n\n    provider_basic_test(zookeeper_service_creator, zookeeper_service_deleter);\n    provider_recursively_create_delete_test(zookeeper_service_creator, zookeeper_service_deleter);\n}\n"
  },
  {
    "path": "src/meta/test/meta_state/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\nwhile read -r -a line; do\n    test_case=${line[0]}\n    gtest_filter=${line[1]}\n    output_xml=\"${REPORT_DIR}/dsn_meta_state_tests_${test_case/.ini/.xml}\"\n    echo \"============ run dsn_meta_state_tests ${test_case} with gtest_filter ${gtest_filter} ============\"\n    ./clear.sh\n    GTEST_OUTPUT=\"xml:${output_xml}\" GTEST_FILTER=${gtest_filter} ./dsn_meta_state_tests ${test_case}\n\n    if [ $? -ne 0 ]; then\n        echo \"run dsn_meta_state_tests $test_case failed\"\n        echo \"---- ls ----\"\n        ls -l\n        if find . -name log.1.txt; then\n            echo \"---- tail -n 100 log.1.txt ----\"\n            tail -n 100 `find . -name log.1.txt`\n        fi\n        if [ -f core ]; then\n            echo \"---- gdb ./dsn_meta_state_tests core ----\"\n            gdb ./dsn_meta_state_tests core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n        fi\n        exit 1\n    fi\n    echo \"============ done dsn_meta_state_tests ${test_case} with gtest_filter ${gtest_filter} ============\"\ndone <gtest.filter\n"
  },
  {
    "path": "src/meta/test/meta_state_service_utils_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/dist/meta_state_service.h>\n#include <fmt/format.h>\n#include <dsn/dist/replication.h>\n\n#include \"meta/meta_state_service_utils.h\"\n\nusing namespace dsn;\nusing namespace dsn::replication;\n\nstruct meta_state_service_utils_test : ::testing::Test\n{\n    void SetUp() override\n    {\n        _svc = utils::factory_store<dist::meta_state_service>::create(\"meta_state_service_simple\",\n                                                                      PROVIDER_TYPE_MAIN);\n\n        error_code err = _svc->initialize({});\n        ASSERT_EQ(err, ERR_OK);\n\n        _storage = new mss::meta_storage(_svc, &_tracker);\n    }\n\n    void TearDown() override\n    {\n        delete _svc;\n        delete _storage;\n    }\n\nprotected:\n    dist::meta_state_service *_svc;\n    mss::meta_storage *_storage;\n    task_tracker _tracker;\n};\n\nTEST_F(meta_state_service_utils_test, create_recursively)\n{\n    _storage->create_node_recursively(\n        std::queue<std::string>({\"/1\", \"2\", \"3\", \"4\"}), dsn::blob(\"a\", 0, 1), [&]() {\n            _storage->get_data(\"/1\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n\n            _storage->get_data(\"/1/2\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n\n            _storage->get_data(\"/1/2/3\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n\n            _storage->get_data(\"/1/2/3/4\",\n                               [](const blob &val) { ASSERT_EQ(val.to_string(), \"a\"); });\n        });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->create_node_recursively(std::queue<std::string>({\"/1\"}), dsn::blob(\"a\", 0, 1), [&]() {\n        _storage->get_data(\"/1\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n    });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->delete_node_recursively(\"/1\", []() {});\n    _tracker.wait_outstanding_tasks();\n}\n\nTEST_F(meta_state_service_utils_test, delete_and_get)\n{\n    // create and delete\n    _storage->create_node(\n        \"/2\", dsn::blob(\"b\", 0, 1), [&]() { _storage->delete_node(\"/2\", []() {}); });\n    _tracker.wait_outstanding_tasks();\n\n    // try get\n    _storage->get_data(\"/2\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n    _tracker.wait_outstanding_tasks();\n}\n\nTEST_F(meta_state_service_utils_test, delete_recursively)\n{\n    _storage->create_node_recursively(\n        std::queue<std::string>({\"/1\", \"2\", \"3\", \"4\"}), dsn::blob(\"c\", 0, 1), [&]() {\n            _storage->set_data(\"/1\", dsn::blob(\"c\", 0, 1), [&]() {\n                _storage->get_data(\"/1\", [](const blob &val) { ASSERT_EQ(val.to_string(), \"c\"); });\n            });\n        });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->delete_node_recursively(\"/1\", [&]() {\n        _storage->get_data(\"/1\", [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n    });\n    _tracker.wait_outstanding_tasks();\n}\n\nTEST_F(meta_state_service_utils_test, concurrent)\n{\n    for (int i = 1; i <= 100; i++) {\n        binary_writer w;\n        w.write(std::to_string(i));\n\n        _storage->create_node(fmt::format(\"/{}\", i), w.get_buffer(), [&]() {});\n    }\n    _tracker.wait_outstanding_tasks();\n\n    for (int i = 1; i <= 100; i++) {\n        _storage->get_data(fmt::format(\"/{}\", i), [i](const blob &val) {\n            binary_reader rd(val);\n\n            std::string value_str;\n            rd.read(value_str);\n            ASSERT_EQ(value_str, std::to_string(i));\n        });\n    }\n    _tracker.wait_outstanding_tasks();\n\n    // ensure everything is cleared\n    for (int i = 1; i <= 100; i++) {\n        _storage->delete_node(fmt::format(\"/{}\", i), [i, this]() {\n            _storage->get_data(fmt::format(\"/{}\", i),\n                               [](const blob &val) { ASSERT_EQ(val.data(), nullptr); });\n        });\n    }\n    _tracker.wait_outstanding_tasks();\n}\n\nTEST_F(meta_state_service_utils_test, get_children)\n{\n    _storage->create_node(\"/1\", dsn::blob(), [this]() {\n        _storage->create_node(\"/1/99\", dsn::blob(), []() {});\n        _storage->create_node(\"/1/999\", dsn::blob(), []() {});\n        _storage->create_node(\"/1/9999\", dsn::blob(), []() {});\n    });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->get_children(\"/1\", [](bool node_exists, const std::vector<std::string> &children) {\n        ASSERT_TRUE(node_exists);\n\n        auto children_copy = children;\n        std::sort(children_copy.begin(), children_copy.end());\n        ASSERT_EQ(children_copy, std::vector<std::string>({\"99\", \"999\", \"9999\"}));\n    });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->delete_node(\"/1/99\", []() {});\n    _storage->delete_node(\"/1/999\", []() {});\n    _storage->delete_node(\"/1/9999\", []() {});\n    _tracker.wait_outstanding_tasks();\n\n    _storage->get_children(\"/1\", [](bool node_exists, const std::vector<std::string> &children) {\n        ASSERT_TRUE(node_exists);\n        ASSERT_EQ(children.size(), 0);\n    });\n    _tracker.wait_outstanding_tasks();\n\n    _storage->delete_node_recursively(\"/1\", []() {});\n    _tracker.wait_outstanding_tasks();\n\n    _storage->get_children(\"/1\", [](bool node_exists, const std::vector<std::string> &children) {\n        ASSERT_FALSE(node_exists);\n        ASSERT_EQ(children.size(), 0);\n    });\n    _tracker.wait_outstanding_tasks();\n}\n"
  },
  {
    "path": "src/meta/test/meta_test_base.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"meta_test_base.h\"\n\n#include <dsn/dist/fmt_logging.h>\n\n#include \"meta/server_load_balancer.h\"\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/meta_split_service.h\"\n#include \"meta/meta_bulk_load_service.h\"\n#include \"meta/test/misc/misc.h\"\n\n#include \"meta_service_test_app.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint64(min_live_node_count_for_unfreeze);\n\nmeta_test_base::~meta_test_base() {}\n\nvoid meta_test_base::SetUp()\n{\n    _ms = make_unique<fake_receiver_meta_service>();\n    _ms->_failure_detector.reset(new meta_server_failure_detector(_ms.get()));\n    _ms->_balancer.reset(utils::factory_store<server_load_balancer>::create(\n        _ms->_meta_opts._lb_opts.server_load_balancer_type.c_str(), PROVIDER_TYPE_MAIN, _ms.get()));\n    _ms->_partition_guardian.reset(utils::factory_store<partition_guardian>::create(\n        _ms->_meta_opts.partition_guardian_type.c_str(), PROVIDER_TYPE_MAIN, _ms.get()));\n    ASSERT_EQ(_ms->remote_storage_initialize(), ERR_OK);\n    _ms->initialize_duplication_service();\n    ASSERT_TRUE(_ms->_dup_svc);\n    _ms->_split_svc = make_unique<meta_split_service>(_ms.get());\n    ASSERT_TRUE(_ms->_split_svc);\n    _ms->_bulk_load_svc = make_unique<bulk_load_service>(\n        _ms.get(), meta_options::concat_path_unix_style(_ms->_cluster_root, \"bulk_load\"));\n    ASSERT_TRUE(_ms->_bulk_load_svc);\n    _ms->_bulk_load_svc->initialize_bulk_load_service();\n\n    _ss = _ms->_state;\n    _ss->initialize(_ms.get(), _ms->_cluster_root + \"/apps\");\n    _app_root = _ss->_apps_root;\n\n    _ms->_started = true;\n    _ms->set_function_level(meta_function_level::fl_steady);\n\n    // recover apps from meta storage\n    ASSERT_EQ(_ss->initialize_data_structure(), ERR_OK);\n}\n\nvoid meta_test_base::TearDown()\n{\n    if (_ss && _ms) {\n        delete_all_on_meta_storage();\n    }\n\n    _ss.reset();\n    _ms.reset(nullptr);\n}\n\nvoid meta_test_base::delete_all_on_meta_storage()\n{\n    _ms->get_meta_storage()->get_children(\n        {\"/\"}, [this](bool, const std::vector<std::string> &children) {\n            for (const std::string &child : children) {\n                _ms->get_meta_storage()->delete_node_recursively(\"/\" + child, []() {});\n            }\n        });\n    wait_all();\n}\n\nvoid meta_test_base::initialize_node_state() { _ss->initialize_node_state(); }\n\nvoid meta_test_base::wait_all() { _ms->tracker()->wait_outstanding_tasks(); }\n\nvoid meta_test_base::set_min_live_node_count_for_unfreeze(uint64_t node_count)\n{\n    FLAGS_min_live_node_count_for_unfreeze = node_count;\n}\n\nvoid meta_test_base::set_node_live_percentage_threshold_for_update(uint64_t percentage_threshold)\n{\n    _ms->_node_live_percentage_threshold_for_update = percentage_threshold;\n}\n\nstd::vector<rpc_address> meta_test_base::get_alive_nodes() const\n{\n    std::vector<dsn::rpc_address> nodes;\n\n    zauto_read_lock l(_ss->_lock);\n\n    for (const auto &node : _ss->_nodes) {\n        if (node.second.alive()) {\n            nodes.push_back(node.first);\n        }\n    }\n\n    return nodes;\n}\n\nstd::vector<rpc_address> meta_test_base::ensure_enough_alive_nodes(int min_node_count)\n{\n    if (min_node_count < 1) {\n        return std::vector<dsn::rpc_address>();\n    }\n\n    std::vector<dsn::rpc_address> nodes(get_alive_nodes());\n    if (!nodes.empty()) {\n        auto node_count = static_cast<int>(nodes.size());\n        dassert_f(node_count >= min_node_count,\n                  \"there should be at least {} alive nodes, now we just have {} alive nodes\",\n                  min_node_count,\n                  node_count);\n\n        dinfo_f(\"already exists {} alive nodes: \", nodes.size());\n        for (const auto &node : nodes) {\n            dinfo_f(\"    {}\", node.to_string());\n        }\n\n        // ensure that _ms->_alive_set is identical with _ss->_nodes\n        _ms->set_node_state(nodes, true);\n        return nodes;\n    }\n\n    nodes = generate_node_list(min_node_count);\n    _ms->set_node_state(nodes, true);\n\n    while (true) {\n        {\n            std::vector<dsn::rpc_address> alive_nodes(get_alive_nodes());\n            if (static_cast<int>(alive_nodes.size()) >= min_node_count) {\n                break;\n            }\n        }\n\n        std::this_thread::sleep_for(std::chrono::milliseconds(5));\n    }\n\n    dinfo_f(\"created {} alive nodes: \", nodes.size());\n    for (const auto &node : nodes) {\n        dinfo_f(\"    {}\", node.to_string());\n    }\n    return nodes;\n}\n\nvoid meta_test_base::create_app(const std::string &name, uint32_t partition_count)\n{\n    configuration_create_app_request req;\n    configuration_create_app_response resp;\n    req.app_name = name;\n    req.options.app_type = \"simple_kv\";\n    req.options.partition_count = partition_count;\n    req.options.replica_count = 3;\n    req.options.success_if_exist = false;\n    req.options.is_stateful = true;\n    req.options.envs[\"value_version\"] = \"1\";\n\n    set_min_live_node_count_for_unfreeze(2);\n    ensure_enough_alive_nodes(3);\n\n    auto result = fake_create_app(_ss.get(), req);\n    fake_wait_rpc(result, resp);\n    ASSERT_EQ(resp.err, ERR_OK) << resp.err.to_string() << \" \" << name;\n\n    // wait for the table to create\n    ASSERT_TRUE(_ss->spin_wait_staging(30));\n}\n\n// drop an app for test.\nvoid meta_test_base::drop_app(const std::string &name)\n{\n    configuration_drop_app_request req;\n    configuration_drop_app_response resp;\n    req.app_name = name;\n    req.options.success_if_not_exist = false;\n\n    auto result = fake_drop_app(_ss.get(), req);\n    fake_wait_rpc(result, resp);\n    ASSERT_EQ(resp.err, ERR_OK) << resp.err.to_string() << \" \" << name;\n\n    ASSERT_TRUE(_ss->spin_wait_staging(30));\n}\n\nstd::shared_ptr<app_state> meta_test_base::find_app(const std::string &name)\n{\n    return _ss->get_app(name);\n}\n\nconfiguration_update_app_env_response\nmeta_test_base::update_app_envs(const std::string &app_name,\n                                const std::vector<std::string> &env_keys,\n                                const std::vector<std::string> &env_vals)\n{\n    auto req = make_unique<configuration_update_app_env_request>();\n    req->__set_app_name(std::move(app_name));\n    req->__set_op(std::move(app_env_operation::type::APP_ENV_OP_SET));\n    req->__set_keys(env_keys);\n    req->__set_values(env_vals);\n\n    app_env_rpc rpc(std::move(req), RPC_CM_UPDATE_APP_ENV); // don't need reply\n    _ss->set_app_envs(rpc);\n    _ss->wait_all_task();\n    return rpc.response();\n}\n\nvoid meta_test_base::mock_node_state(const rpc_address &addr, const node_state &node)\n{\n    _ss->_nodes[addr] = node;\n}\n\nmeta_duplication_service &meta_test_base::dup_svc() { return *(_ms->_dup_svc); }\n\nmeta_split_service &meta_test_base::split_svc() { return *(_ms->_split_svc); }\n\nbulk_load_service &meta_test_base::bulk_svc() { return *(_ms->_bulk_load_svc); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/meta_test_base.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/dist/replication/replication_types.h>\n\n#include \"meta/meta_data.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass meta_split_service;\nclass meta_duplication_service;\nclass bulk_load_service;\nclass meta_service;\nclass server_state;\n\nclass meta_test_base : public testing::Test\n{\npublic:\n    ~meta_test_base();\n\n    void SetUp() override;\n\n    void TearDown() override;\n\n    void delete_all_on_meta_storage();\n\n    void initialize_node_state();\n\n    void wait_all();\n\n    void set_min_live_node_count_for_unfreeze(uint64_t node_count);\n\n    void set_node_live_percentage_threshold_for_update(uint64_t percentage_threshold);\n\n    std::vector<rpc_address> ensure_enough_alive_nodes(int min_node_count);\n\n    // create an app for test with specified name and specified partition count\n    void create_app(const std::string &name, uint32_t partition_count);\n\n    void create_app(const std::string &name) { create_app(name, 8); }\n\n    // drop an app for test.\n    void drop_app(const std::string &name);\n\n    configuration_update_app_env_response update_app_envs(const std::string &app_name,\n                                                          const std::vector<std::string> &env_keys,\n                                                          const std::vector<std::string> &env_vals);\n\n    void mock_node_state(const rpc_address &addr, const node_state &node);\n\n    std::shared_ptr<app_state> find_app(const std::string &name);\n\n    meta_duplication_service &dup_svc();\n\n    meta_split_service &split_svc();\n\n    bulk_load_service &bulk_svc();\n\n    std::shared_ptr<server_state> _ss;\n    std::unique_ptr<meta_service> _ms;\n    std::string _app_root;\n\nprivate:\n    std::vector<rpc_address> get_alive_nodes() const;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/misc/misc.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <cstdlib>\n#include <iostream>\n#include <boost/lexical_cast.hpp>\n#include <dsn/utility/rand.h>\n\n#include \"common/replication_common.h\"\n#include \"misc.h\"\n\nusing namespace dsn::replication;\n\n#define ASSERT_EQ(left, right) dassert((left) == (right), \"\")\n#define ASSERT_TRUE(exp) dassert((exp), \"\")\n#define ASSERT_FALSE(exp) dassert(!(exp), \"\")\n\nuint32_t random32(uint32_t min, uint32_t max)\n{\n    uint32_t res = (uint32_t)(rand() % (max - min + 1));\n    return res + min;\n}\n\nvoid generate_node_list(std::vector<dsn::rpc_address> &output_list, int min_count, int max_count)\n{\n    int count = random32(min_count, max_count);\n    output_list.resize(count);\n    for (int i = 0; i < count; ++i)\n        output_list[i].assign_ipv4(\"127.0.0.1\", i + 1);\n}\n\nvoid verbose_apps(const app_mapper &input_apps)\n{\n    std::cout << input_apps.size() << std::endl;\n    for (const auto &apps : input_apps) {\n        const std::shared_ptr<app_state> &app = apps.second;\n        std::cout << apps.first << \" \" << app->partition_count << std::endl;\n        for (int i = 0; i < app->partition_count; ++i) {\n            std::cout << app->partitions[i].secondaries.size() + 1 << \" \"\n                      << app->partitions[i].primary.to_string();\n            for (int j = 0; j < app->partitions[i].secondaries.size(); ++j) {\n                std::cout << \" \" << app->partitions[i].secondaries[j].to_string();\n            }\n            std::cout << std::endl;\n        }\n    }\n}\n\nvoid generate_node_mapper(\n    /*out*/ node_mapper &output_nodes,\n    const app_mapper &input_apps,\n    const std::vector<dsn::rpc_address> &input_node_list)\n{\n    output_nodes.clear();\n    for (auto &addr : input_node_list) {\n        get_node_state(output_nodes, addr, true)->set_alive(true);\n    }\n\n    for (auto &kv : input_apps) {\n        const std::shared_ptr<app_state> &app = kv.second;\n        for (const dsn::partition_configuration &pc : app->partitions) {\n            node_state *ns;\n            if (!pc.primary.is_invalid()) {\n                ns = get_node_state(output_nodes, pc.primary, true);\n                ns->put_partition(pc.pid, true);\n            }\n            for (const dsn::rpc_address &sec : pc.secondaries) {\n                ASSERT_FALSE(sec.is_invalid());\n                ns = get_node_state(output_nodes, sec, true);\n                ns->put_partition(pc.pid, false);\n            }\n        }\n    }\n}\n\nvoid generate_app(/*out*/ std::shared_ptr<app_state> &app,\n                  const std::vector<dsn::rpc_address> &node_list)\n{\n    for (dsn::partition_configuration &pc : app->partitions) {\n        pc.ballot = random32(1, 10000);\n        std::vector<int> indices(3, 0);\n        indices[0] = random32(0, node_list.size() - 3);\n        indices[1] = random32(indices[0] + 1, node_list.size() - 2);\n        indices[2] = random32(indices[1] + 1, node_list.size() - 1);\n\n        int p = random32(0, 2);\n        pc.primary = node_list[indices[p]];\n        pc.secondaries.clear();\n        for (unsigned int i = 0; i != indices.size(); ++i)\n            if (i != p)\n                pc.secondaries.push_back(node_list[indices[i]]);\n\n        ASSERT_FALSE(pc.primary.is_invalid());\n        ASSERT_FALSE(is_secondary(pc, pc.primary));\n        ASSERT_EQ(pc.secondaries.size(), 2);\n        ASSERT_TRUE(pc.secondaries[0] != pc.secondaries[1]);\n    }\n}\n\nvoid generate_app_serving_replica_info(/*out*/ std::shared_ptr<dsn::replication::app_state> &app,\n                                       int total_disks)\n{\n    char buffer[256];\n    for (int i = 0; i < app->partition_count; ++i) {\n        config_context &cc = app->helpers->contexts[i];\n        dsn::partition_configuration &pc = app->partitions[i];\n        replica_info ri;\n\n        snprintf(buffer, 256, \"disk%u\", dsn::rand::next_u32(1, total_disks));\n        ri.disk_tag = buffer;\n        cc.collect_serving_replica(pc.primary, ri);\n\n        for (const dsn::rpc_address &addr : pc.secondaries) {\n            snprintf(buffer, 256, \"disk%u\", dsn::rand::next_u32(1, total_disks));\n            ri.disk_tag = buffer;\n            cc.collect_serving_replica(addr, ri);\n        }\n    }\n}\n\nvoid generate_apps(/*out*/ dsn::replication::app_mapper &mapper,\n                   const std::vector<dsn::rpc_address> &node_list,\n                   int apps_count,\n                   int disks_per_node,\n                   std::pair<uint32_t, uint32_t> partitions_range,\n                   bool generate_serving_info)\n{\n    mapper.clear();\n    dsn::app_info info;\n    for (int i = 1; i <= apps_count; ++i) {\n        info.status = dsn::app_status::AS_AVAILABLE;\n        info.app_id = i;\n        info.is_stateful = true;\n        info.app_name = \"test_app\" + boost::lexical_cast<std::string>(i);\n        info.app_type = \"test\";\n        info.max_replica_count = 3;\n        info.partition_count = random32(partitions_range.first, partitions_range.second);\n        std::shared_ptr<app_state> the_app = app_state::create(info);\n        generate_app(the_app, node_list);\n\n        if (generate_serving_info) {\n            generate_app_serving_replica_info(the_app, disks_per_node);\n        }\n        dinfo(\"generated app, partitions(%d)\", info.partition_count);\n        mapper.emplace(the_app->app_id, the_app);\n    }\n}\n\nvoid generate_node_fs_manager(const app_mapper &apps,\n                              const node_mapper &nodes,\n                              /*out*/ nodes_fs_manager &nfm,\n                              int total_disks)\n{\n    nfm.clear();\n    const char *prefix = \"/home/work/\";\n    char pid_dir[256];\n    std::vector<std::string> data_dirs(total_disks);\n    std::vector<std::string> tags(total_disks);\n    for (int i = 0; i < data_dirs.size(); ++i) {\n        snprintf(pid_dir, 256, \"%sdisk%d\", prefix, i + 1);\n        data_dirs[i] = pid_dir;\n        snprintf(pid_dir, 256, \"disk%d\", i + 1);\n        tags[i] = pid_dir;\n    }\n\n    for (const auto &kv : nodes) {\n        const node_state &ns = kv.second;\n        if (nfm.find(ns.addr()) == nfm.end()) {\n            nfm.emplace(ns.addr(), std::make_shared<fs_manager>(true));\n        }\n        fs_manager &manager = *(nfm.find(ns.addr())->second);\n        manager.initialize(data_dirs, tags, true);\n        ns.for_each_partition([&](const dsn::gpid &pid) {\n            const config_context &cc = *get_config_context(apps, pid);\n            snprintf(pid_dir,\n                     256,\n                     \"%s%s/%d.%d.test\",\n                     prefix,\n                     cc.find_from_serving(ns.addr())->disk_tag.c_str(),\n                     pid.get_app_id(),\n                     pid.get_partition_index());\n            dinfo(\"concat pid_dir(%s) of node(%s)\", pid_dir, ns.addr().to_string());\n            manager.add_replica(pid, pid_dir);\n            return true;\n        });\n    }\n}\n\nvoid track_disk_info_check_and_apply(const dsn::replication::configuration_proposal_action &act,\n                                     const dsn::gpid &pid,\n                                     /*in-out*/ dsn::replication::app_mapper &apps,\n                                     /*in-out*/ dsn::replication::node_mapper & /*nodes*/,\n                                     /*in-out*/ nodes_fs_manager &manager)\n{\n    config_context *cc = get_config_context(apps, pid);\n    ASSERT_TRUE(cc != nullptr);\n\n    fs_manager *target_manager = get_fs_manager(manager, act.target);\n    ASSERT_TRUE(target_manager != nullptr);\n    fs_manager *node_manager = get_fs_manager(manager, act.node);\n    ASSERT_TRUE(node_manager != nullptr);\n\n    std::string dir;\n    replica_info ri;\n    switch (act.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n        target_manager->allocate_dir(pid, \"test\", dir);\n        ASSERT_EQ(dsn::ERR_OK, target_manager->get_disk_tag(dir, ri.disk_tag));\n        cc->collect_serving_replica(act.target, ri);\n        break;\n\n    case config_type::CT_ADD_SECONDARY:\n    case config_type::CT_ADD_SECONDARY_FOR_LB:\n        node_manager->allocate_dir(pid, \"test\", dir);\n        ASSERT_EQ(dsn::ERR_OK, node_manager->get_disk_tag(dir, ri.disk_tag));\n        cc->collect_serving_replica(act.node, ri);\n        break;\n\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        break;\n\n    case config_type::CT_REMOVE:\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        node_manager->remove_replica(pid);\n        cc->remove_from_serving(act.node);\n        break;\n\n    default:\n        ASSERT_TRUE(false);\n        break;\n    }\n}\n\nvoid proposal_action_check_and_apply(const configuration_proposal_action &act,\n                                     const dsn::gpid &pid,\n                                     app_mapper &apps,\n                                     node_mapper &nodes,\n                                     nodes_fs_manager *manager)\n{\n    dsn::partition_configuration &pc = *get_config(apps, pid);\n    node_state *ns;\n\n    ++pc.ballot;\n    ASSERT_TRUE(act.type != config_type::CT_INVALID);\n    ASSERT_FALSE(act.target.is_invalid());\n    ASSERT_FALSE(act.node.is_invalid());\n\n    if (manager) {\n        track_disk_info_check_and_apply(act, pid, apps, nodes, *manager);\n    }\n\n    switch (act.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n        ASSERT_EQ(act.node, act.target);\n        ASSERT_TRUE(pc.primary.is_invalid());\n        ASSERT_TRUE(pc.secondaries.empty());\n\n        pc.primary = act.node;\n        ns = &nodes[act.node];\n        ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE);\n        ns->put_partition(pc.pid, true);\n        break;\n\n    case config_type::CT_ADD_SECONDARY:\n        ASSERT_EQ(act.target, pc.primary);\n        ASSERT_FALSE(is_member(pc, act.node));\n\n        pc.secondaries.push_back(act.node);\n        ns = &nodes[act.node];\n        ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_INACTIVE);\n        ns->put_partition(pc.pid, false);\n\n        break;\n\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        ASSERT_EQ(act.node, act.target);\n        ASSERT_EQ(act.node, pc.primary);\n        ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n        ASSERT_FALSE(is_secondary(pc, pc.primary));\n        nodes[act.node].remove_partition(pc.pid, true);\n        pc.secondaries.push_back(pc.primary);\n        pc.primary.set_invalid();\n        break;\n\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        ASSERT_TRUE(pc.primary.is_invalid());\n        ASSERT_EQ(act.node, act.target);\n        ASSERT_TRUE(is_secondary(pc, act.node));\n        ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n\n        ns = &nodes[act.node];\n        pc.primary = act.node;\n        ASSERT_TRUE(replica_helper::remove_node(act.node, pc.secondaries));\n        ns->put_partition(pc.pid, true);\n        break;\n\n    case config_type::CT_ADD_SECONDARY_FOR_LB:\n        ASSERT_EQ(act.target, pc.primary);\n        ASSERT_FALSE(is_member(pc, act.node));\n        ASSERT_FALSE(act.node.is_invalid());\n        pc.secondaries.push_back(act.node);\n\n        ns = &nodes[act.node];\n        ns->put_partition(pc.pid, false);\n        ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY);\n        break;\n\n    // in balancer, remove primary is not allowed\n    case config_type::CT_REMOVE:\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        ASSERT_FALSE(pc.primary.is_invalid());\n        ASSERT_EQ(pc.primary, act.target);\n        ASSERT_TRUE(is_secondary(pc, act.node));\n        ASSERT_TRUE(nodes.find(act.node) != nodes.end());\n        ASSERT_TRUE(replica_helper::remove_node(act.node, pc.secondaries));\n\n        ns = &nodes[act.node];\n        ASSERT_EQ(ns->served_as(pc.pid), partition_status::PS_SECONDARY);\n        ns->remove_partition(pc.pid, false);\n        break;\n\n    default:\n        ASSERT_TRUE(false);\n        break;\n    }\n}\n\nvoid migration_check_and_apply(app_mapper &apps,\n                               node_mapper &nodes,\n                               migration_list &ml,\n                               nodes_fs_manager *manager)\n{\n    int i = 0;\n    for (auto kv = ml.begin(); kv != ml.end(); ++kv) {\n        std::shared_ptr<configuration_balancer_request> &proposal = kv->second;\n        dinfo(\"the %dth round of proposal, gpid(%d.%d)\",\n              i++,\n              proposal->gpid.get_app_id(),\n              proposal->gpid.get_partition_index());\n        std::shared_ptr<app_state> &the_app = apps.find(proposal->gpid.get_app_id())->second;\n\n        ASSERT_EQ(proposal->gpid.get_app_id(), the_app->app_id);\n        ASSERT_TRUE(proposal->gpid.get_partition_index() < the_app->partition_count);\n        dsn::partition_configuration &pc =\n            the_app->partitions[proposal->gpid.get_partition_index()];\n\n        ASSERT_FALSE(pc.primary.is_invalid());\n        ASSERT_EQ(pc.secondaries.size(), 2);\n        for (auto &addr : pc.secondaries)\n            ASSERT_FALSE(addr.is_invalid());\n        ASSERT_FALSE(is_secondary(pc, pc.primary));\n\n        for (unsigned int j = 0; j < proposal->action_list.size(); ++j) {\n            configuration_proposal_action &act = proposal->action_list[j];\n            dinfo(\"the %dth round of action, type: %s, node: %s, target: %s\",\n                  j,\n                  dsn::enum_to_string(act.type),\n                  act.node.to_string(),\n                  act.target.to_string());\n            proposal_action_check_and_apply(act, proposal->gpid, apps, nodes, manager);\n        }\n    }\n}\n\nvoid app_mapper_compare(const app_mapper &mapper1, const app_mapper &mapper2)\n{\n    ASSERT_EQ(mapper1.size(), mapper2.size());\n    for (auto &kv : mapper1) {\n        const std::shared_ptr<app_state> &app1 = kv.second;\n        ASSERT_TRUE(mapper2.find(app1->app_id) != mapper2.end());\n        const std::shared_ptr<app_state> app2 = mapper2.find(app1->app_id)->second;\n\n        ASSERT_EQ(app1->app_id, app2->app_id);\n        ASSERT_EQ(app1->app_name, app2->app_name);\n        ASSERT_EQ(app1->app_type, app2->app_type);\n        ASSERT_EQ(app1->status, app2->status);\n        ASSERT_TRUE(app1->status == dsn::app_status::AS_AVAILABLE ||\n                    app1->status == dsn::app_status::AS_DROPPED);\n        if (app1->status == dsn::app_status::AS_AVAILABLE) {\n            ASSERT_EQ(app1->partition_count, app2->partition_count);\n            for (unsigned int i = 0; i < app1->partition_count; ++i) {\n                ASSERT_TRUE(is_partition_config_equal(app1->partitions[i], app2->partitions[i]));\n            }\n        }\n    }\n}\n\nbool spin_wait_condition(const std::function<bool()> &pred, int seconds)\n{\n    for (int i = 0; i != seconds; ++i) {\n        std::atomic_thread_fence(std::memory_order_seq_cst);\n        if (pred())\n            return true;\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n    return pred();\n}\n"
  },
  {
    "path": "src/meta/test/misc/misc.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <vector>\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n#include \"meta/meta_data.h\"\n#include \"common/fs_manager.h\"\n\ntypedef std::map<dsn::rpc_address, std::shared_ptr<dsn::replication::fs_manager>> nodes_fs_manager;\n\ninline dsn::replication::fs_manager *get_fs_manager(nodes_fs_manager &nfm,\n                                                    const dsn::rpc_address &node)\n{\n    auto iter = nfm.find(node);\n    if (nfm.end() == iter)\n        return nullptr;\n    return iter->second.get();\n}\n\n// Generates a random number between [min, max]\nuint32_t random32(uint32_t min, uint32_t max);\n\n// Generates a random number [min_count, max_count] of node addresses\n// each node is given a random port value in range of [min_count, max_count]\nvoid generate_node_list(/*out*/ std::vector<dsn::rpc_address> &output_list,\n                        int min_count,\n                        int max_count);\n\n// Generates `size` of node addresses, each with port value in range [start_port, start_port + size]\ninline std::vector<dsn::rpc_address> generate_node_list(size_t size, int start_port = 12321)\n{\n    std::vector<dsn::rpc_address> result;\n    result.resize(size);\n    for (int i = 0; i < size; ++i)\n        result[i].assign_ipv4(\"127.0.0.1\", static_cast<uint16_t>(start_port + i + 1));\n    return result;\n}\n\n// This func randomly picks 3 nodes from `node_list` for each of the partition of the app.\n// For each partition, it picks one node as primary, the others as secondaries.\n// REQUIRES: node_list.size() >= 3\nvoid generate_app(\n    /*out*/ std::shared_ptr<dsn::replication::app_state> &app,\n    const std::vector<dsn::rpc_address> &node_list);\n\nvoid generate_node_mapper(\n    /*out*/ dsn::replication::node_mapper &output_nodes,\n    const dsn::replication::app_mapper &input_apps,\n    const std::vector<dsn::rpc_address> &input_node_list);\n\nvoid generate_app_serving_replica_info(/*out*/ std::shared_ptr<dsn::replication::app_state> &app,\n                                       int total_disks);\n\nvoid generate_node_fs_manager(const dsn::replication::app_mapper &apps,\n                              const dsn::replication::node_mapper &nodes,\n                              /*out*/ nodes_fs_manager &nfm,\n                              int total_disks);\n\nvoid generate_apps(/*out*/ dsn::replication::app_mapper &apps,\n                   const std::vector<dsn::rpc_address> &node_list,\n                   int apps_count,\n                   int disks_per_node,\n                   std::pair<uint32_t, uint32_t> partitions_range,\n                   bool generate_serving_info);\n\n// when the test need to track the disk info, please input the fs_manager of all disks,\n// the check_apply routine will modify it accordingly.\n// if track disk info is not necessary, please input a nullptr.\nvoid migration_check_and_apply(\n    /*in-out*/ dsn::replication::app_mapper &apps,\n    /*in-out*/ dsn::replication::node_mapper &nodes,\n    /*in-out*/ dsn::replication::migration_list &ml,\n    /*in-out*/ nodes_fs_manager *manager);\n\n// when the test need to track the disk info, please input the fs_manager of all disks,\n// the check_apply routine will modify it accordingly.\n// if track disk info is not necessary, please input a nullptr.\nvoid proposal_action_check_and_apply(const dsn::replication::configuration_proposal_action &act,\n                                     const dsn::gpid &pid,\n                                     dsn::replication::app_mapper &apps,\n                                     dsn::replication::node_mapper &nodes,\n                                     nodes_fs_manager *manager);\n\nvoid track_disk_info_check_and_apply(const dsn::replication::configuration_proposal_action &act,\n                                     const dsn::gpid &pid,\n                                     /*in-out*/ dsn::replication::app_mapper &apps,\n                                     /*in-out*/ dsn::replication::node_mapper &nodes,\n                                     /*in-out*/ nodes_fs_manager &manager);\n\nvoid app_mapper_compare(const dsn::replication::app_mapper &mapper1,\n                        const dsn::replication::app_mapper &mapper2);\n\nvoid verbose_apps(const dsn::replication::app_mapper &input_apps);\n\nbool spin_wait_condition(const std::function<bool()> &pred, int seconds);\n"
  },
  {
    "path": "src/meta/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn.meta.test.1.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn.meta.test\n"
  },
  {
    "path": "src/meta/test/server_state_restore_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/cpp/json_helper.h>\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n\n#include \"common/backup_common.h\"\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n#include \"meta_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass server_state_restore_test : public meta_test_base\n{\npublic:\n    server_state_restore_test()\n        : _old_app_name(\"test_table\"),\n          _new_app_name(\"new_table\"),\n          _cluster_name(\"onebox\"),\n          _provider(\"local_service\")\n    {\n    }\n\n    void SetUp() override\n    {\n        meta_test_base::SetUp();\n\n        // create a test app with 8 partitions.\n        create_app(_old_app_name);\n    }\n\n    start_backup_app_response start_backup(int64_t app_id,\n                                           const std::string user_specified_path = \"\")\n    {\n        auto request = dsn::make_unique<start_backup_app_request>();\n        request->app_id = app_id;\n        request->backup_provider_type = _provider;\n        if (!user_specified_path.empty()) {\n            request->__set_backup_path(user_specified_path);\n        }\n\n        start_backup_app_rpc rpc(std::move(request), RPC_CM_START_BACKUP_APP);\n        _ms->_backup_handler =\n            std::make_shared<backup_service>(_ms.get(), \"mock_policy_root\", _cluster_name, nullptr);\n        _ms->_backup_handler->start_backup_app(rpc);\n        wait_all();\n        return rpc.response();\n    }\n\n    configuration_restore_request create_restore_request(\n        int32_t old_app_id, int64_t backup_id, const std::string user_specified_restore_path = \"\")\n    {\n        configuration_restore_request req;\n        req.app_id = old_app_id;\n        req.app_name = _old_app_name;\n        req.new_app_name = _new_app_name;\n        req.time_stamp = backup_id;\n        req.cluster_name = _cluster_name;\n        req.backup_provider_name = _provider;\n        if (!user_specified_restore_path.empty()) {\n            req.__set_restore_path(user_specified_restore_path);\n        }\n        return req;\n    }\n\n    void test_restore_app(const std::string user_specified_path = \"\")\n    {\n        int32_t old_app_id;\n        {\n            zauto_read_lock l;\n            _ss->lock_read(l);\n            const std::shared_ptr<app_state> &app = _ss->get_app(_old_app_name);\n            old_app_id = app->app_id;\n        }\n\n        // test backup app\n        auto backup_resp = start_backup(old_app_id, user_specified_path);\n        ASSERT_EQ(ERR_OK, backup_resp.err);\n        ASSERT_TRUE(backup_resp.__isset.backup_id);\n        int64_t backup_id = backup_resp.backup_id;\n\n        // test sync_app_from_backup_media()\n        auto req = create_restore_request(old_app_id, backup_id, user_specified_path);\n        dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_START_RESTORE);\n        dsn::marshall(msg, req);\n        error_code ret = ERR_UNKNOWN;\n        dsn::blob app_info;\n        _ss->sync_app_from_backup_media(\n            req, [&ret, &app_info](dsn::error_code err, const dsn::blob &app_info_data) {\n                ret = err;\n                app_info = app_info_data;\n            });\n        while (ret == ERR_UNKNOWN) {\n            // sleep 10 ms.\n            usleep(10 * 1000);\n        }\n        ASSERT_EQ(ERR_OK, ret);\n        ASSERT_LT(0, app_info.length());\n\n        // test restore_app_info()\n        int32_t new_app_id = _ss->next_app_id();\n        auto pair = _ss->restore_app_info(msg, req, app_info);\n        ASSERT_EQ(ERR_OK, pair.first);\n        const std::shared_ptr<app_state> &new_app = pair.second;\n        ASSERT_EQ(new_app_id, new_app->app_id);\n        ASSERT_EQ(_new_app_name, new_app->app_name);\n        ASSERT_EQ(app_status::AS_CREATING, new_app->status);\n\n        // check app_envs\n        auto it = new_app->envs.find(backup_restore_constant::BLOCK_SERVICE_PROVIDER);\n        ASSERT_NE(new_app->envs.end(), it);\n        ASSERT_EQ(_provider, it->second);\n        it = new_app->envs.find(backup_restore_constant::CLUSTER_NAME);\n        ASSERT_NE(new_app->envs.end(), it);\n        ASSERT_EQ(_cluster_name, it->second);\n        it = new_app->envs.find(backup_restore_constant::APP_NAME);\n        ASSERT_NE(new_app->envs.end(), it);\n        ASSERT_EQ(_old_app_name, it->second);\n        it = new_app->envs.find(backup_restore_constant::APP_ID);\n        ASSERT_NE(new_app->envs.end(), it);\n        ASSERT_EQ(std::to_string(old_app_id), it->second);\n        it = new_app->envs.find(backup_restore_constant::BACKUP_ID);\n        ASSERT_NE(new_app->envs.end(), it);\n        ASSERT_EQ(std::to_string(backup_id), it->second);\n        if (!user_specified_path.empty()) {\n            it = new_app->envs.find(backup_restore_constant::RESTORE_PATH);\n            ASSERT_NE(new_app->envs.end(), it);\n            ASSERT_EQ(user_specified_path, it->second);\n        }\n    }\n\nprotected:\n    const std::string _old_app_name;\n    const std::string _new_app_name;\n    const std::string _cluster_name;\n    const std::string _provider;\n};\n\nTEST_F(server_state_restore_test, test_restore_app) { test_restore_app(); }\n\nTEST_F(server_state_restore_test, test_restore_app_with_specific_path)\n{\n    test_restore_app(\"test_path\");\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/server_state_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_c.h>\n\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n\n#include \"meta_service_test_app.h\"\n\nnamespace dsn {\nnamespace replication {\n\nstatic const std::vector<std::string> keys = {\"manual_compact.once.trigger_time\",\n                                              \"manual_compact.once.target_level\",\n                                              \"manual_compact.once.bottommost_level_compaction\",\n                                              \"manual_compact.periodic.trigger_time\",\n                                              \"manual_compact.periodic.target_level\",\n                                              \"manual_compact.periodic.bottommost_level_compaction\",\n                                              \"rocksdb.usage_scenario\",\n                                              \"rocksdb.checkpoint.reserve_min_count\",\n                                              \"rocksdb.checkpoint.reserve_time_seconds\"};\nstatic const std::vector<std::string> values = {\n    \"p1v1\", \"p1v2\", \"p1v3\", \"p2v1\", \"p2v2\", \"p2v3\", \"p3v1\", \"p3v2\", \"p3v3\"};\n\nstatic const std::vector<std::string> del_keys = {\"manual_compact.once.trigger_time\",\n                                                  \"manual_compact.periodic.trigger_time\",\n                                                  \"rocksdb.usage_scenario\"};\nstatic const std::set<std::string> del_keys_set = {\"manual_compact.once.trigger_time\",\n                                                   \"manual_compact.periodic.trigger_time\",\n                                                   \"rocksdb.usage_scenario\"};\n\nstatic const std::string clear_prefix = \"rocksdb\";\n\n// if str = \"prefix.xxx\" then return prefix\n// else return \"\"\nstatic std::string acquire_prefix(const std::string &str)\n{\n    auto index = str.find('.');\n    if (index == std::string::npos) {\n        return \"\";\n    } else {\n        return str.substr(0, index);\n    }\n}\n\nvoid meta_service_test_app::app_envs_basic_test()\n{\n    // create a fake app\n    dsn::app_info info;\n    info.is_stateful = true;\n    info.app_id = 1;\n    info.app_type = \"simple_kv\";\n    info.app_name = \"test_app1\";\n    info.max_replica_count = 3;\n    info.partition_count = 32;\n    info.status = dsn::app_status::AS_CREATING;\n    info.envs.clear();\n    std::shared_ptr<app_state> fake_app = app_state::create(info);\n\n    // create meta_service\n    std::shared_ptr<meta_service> meta_svc = std::make_shared<meta_service>();\n    meta_service *svc = meta_svc.get();\n\n    svc->_meta_opts.cluster_root = \"/meta_test\";\n    svc->_meta_opts.meta_state_service_type = \"meta_state_service_simple\";\n    svc->remote_storage_initialize();\n\n    std::string apps_root = \"/meta_test/apps\";\n    std::shared_ptr<server_state> ss = svc->_state;\n    ss->initialize(svc, apps_root);\n\n    ss->_all_apps.emplace(std::make_pair(fake_app->app_id, fake_app));\n    dsn::error_code ec = ss->sync_apps_to_remote_storage();\n    ASSERT_EQ(ec, dsn::ERR_OK);\n\n    std::cout << \"test server_state::set_app_envs()...\" << std::endl;\n    {\n        configuration_update_app_env_request request;\n        request.__set_app_name(fake_app->app_name);\n        request.__set_op(app_env_operation::type::APP_ENV_OP_SET);\n        request.__set_keys(keys);\n        request.__set_values(values);\n\n        dsn::message_ptr binary_req = dsn::message_ex::create_request(RPC_CM_UPDATE_APP_ENV);\n        dsn::marshall(binary_req, request);\n        dsn::message_ex *recv_msg = create_corresponding_receive(binary_req);\n        app_env_rpc rpc(recv_msg); // don't need reply\n        ss->set_app_envs(rpc);\n        ss->wait_all_task();\n        std::shared_ptr<app_state> app = ss->get_app(fake_app->app_name);\n        ASSERT_TRUE(app != nullptr);\n        for (int idx = 0; idx < keys.size(); idx++) {\n            const std::string &key = keys[idx];\n            ASSERT_EQ(app->envs.count(key), 1);\n            ASSERT_EQ(app->envs.at(key), values[idx]);\n        }\n    }\n\n    std::cout << \"test server_state::del_app_envs()...\" << std::endl;\n    {\n        configuration_update_app_env_request request;\n        request.__set_app_name(fake_app->app_name);\n        request.__set_op(app_env_operation::type::APP_ENV_OP_DEL);\n        request.__set_keys(del_keys);\n\n        dsn::message_ptr binary_req = dsn::message_ex::create_request(RPC_CM_UPDATE_APP_ENV);\n        dsn::marshall(binary_req, request);\n        dsn::message_ex *recv_msg = create_corresponding_receive(binary_req);\n        app_env_rpc rpc(recv_msg); // don't need reply\n        ss->del_app_envs(rpc);\n        ss->wait_all_task();\n\n        std::shared_ptr<app_state> app = ss->get_app(fake_app->app_name);\n        ASSERT_TRUE(app != nullptr);\n        for (int idx = 0; idx < keys.size(); idx++) {\n            const std::string &key = keys[idx];\n            if (del_keys_set.count(key) >= 1) {\n                ASSERT_EQ(app->envs.count(key), 0);\n            } else {\n                ASSERT_EQ(app->envs.count(key), 1);\n                ASSERT_EQ(app->envs.at(key), values[idx]);\n            }\n        }\n    }\n\n    std::cout << \"test server_state::clear_app_envs()...\" << std::endl;\n    {\n        // test specify prefix\n        {\n            configuration_update_app_env_request request;\n            request.__set_app_name(fake_app->app_name);\n            request.__set_op(app_env_operation::type::APP_ENV_OP_CLEAR);\n            request.__set_clear_prefix(clear_prefix);\n\n            dsn::message_ptr binary_req = dsn::message_ex::create_request(RPC_CM_UPDATE_APP_ENV);\n            dsn::marshall(binary_req, request);\n            dsn::message_ex *recv_msg = create_corresponding_receive(binary_req);\n            app_env_rpc rpc(recv_msg); // don't need reply\n            ss->clear_app_envs(rpc);\n            ss->wait_all_task();\n\n            std::shared_ptr<app_state> app = ss->get_app(fake_app->app_name);\n            ASSERT_TRUE(app != nullptr);\n            for (int idx = 0; idx < keys.size(); idx++) {\n                const std::string &key = keys[idx];\n                if (del_keys_set.count(key) <= 0) {\n                    if (acquire_prefix(key) == clear_prefix) {\n                        ASSERT_EQ(app->envs.count(key), 0);\n                    } else {\n                        ASSERT_EQ(app->envs.count(key), 1);\n                        ASSERT_EQ(app->envs.at(key), values[idx]);\n                    }\n                } else {\n                    // key already delete\n                    ASSERT_EQ(app->envs.count(key), 0);\n                }\n            }\n        }\n\n        // test clear all\n        {\n            configuration_update_app_env_request request;\n            request.__set_app_name(fake_app->app_name);\n            request.__set_op(app_env_operation::type::APP_ENV_OP_CLEAR);\n            request.__set_clear_prefix(\"\");\n\n            dsn::message_ptr binary_req = dsn::message_ex::create_request(RPC_CM_UPDATE_APP_ENV);\n            dsn::marshall(binary_req, request);\n            dsn::message_ex *recv_msg = create_corresponding_receive(binary_req);\n            app_env_rpc rpc(recv_msg); // don't need reply\n            ss->clear_app_envs(rpc);\n            ss->wait_all_task();\n\n            std::shared_ptr<app_state> app = ss->get_app(fake_app->app_name);\n            ASSERT_TRUE(app != nullptr);\n            ASSERT_TRUE(app->envs.empty());\n        }\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/state_sync_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <cmath>\n#include <fstream>\n#include <vector>\n#include <iostream>\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n\n#include \"meta/test/misc/misc.h\"\n\n#include \"meta_service_test_app.h\"\n\nnamespace dsn {\nnamespace replication {\n\nstatic void random_assign_partition_config(std::shared_ptr<app_state> &app,\n                                           const std::vector<dsn::rpc_address> &server_list,\n                                           int max_replica_count)\n{\n    auto get_server = [&server_list](int indice) {\n        if (indice % 2 != 0)\n            return dsn::rpc_address();\n        return server_list[indice / 2];\n    };\n\n    int max_servers = (server_list.size() - 1) * 2 - 1;\n    for (dsn::partition_configuration &pc : app->partitions) {\n        int start = 0;\n        std::vector<int> indices;\n        for (int i = 0; i < max_replica_count && start <= max_servers; ++i) {\n            indices.push_back(random32(start, max_servers));\n            start = indices.back() + 1;\n        }\n        pc.primary = get_server(indices[0]);\n        for (int i = 1; i < indices.size(); ++i) {\n            dsn::rpc_address addr = get_server(indices[i]);\n            if (!addr.is_invalid())\n                pc.secondaries.push_back(addr);\n        }\n        pc.last_drops = {server_list.back()};\n    }\n}\n\nstatic void file_data_compare(const char *fname1, const char *fname2)\n{\n    static const int length = 4096;\n    std::shared_ptr<char> buffer(dsn::utils::make_shared_array<char>(length * 2));\n    char *buf1 = buffer.get(), *buf2 = buffer.get() + length;\n\n    std::ifstream ifile1(fname1, std::ios::in | std::ios::binary);\n    std::ifstream ifile2(fname2, std::ios::in | std::ios::binary);\n\n    auto file_length = [](std::ifstream &is) {\n        is.seekg(0, is.end);\n        int result = is.tellg();\n        is.seekg(0, is.beg);\n        return result;\n    };\n\n    int l = file_length(ifile1);\n    ASSERT_EQ(l, file_length(ifile2));\n\n    for (int i = 0; i < l; i += length) {\n        int up_to_bytes = length < (l - i) ? length : (l - i);\n        ifile1.read(buf1, up_to_bytes);\n        ifile2.read(buf2, up_to_bytes);\n        ASSERT_TRUE(memcmp(buf1, buf2, up_to_bytes) == 0);\n    }\n}\n\nvoid meta_service_test_app::state_sync_test()\n{\n    int apps_count = 15;\n    int drop_ratio = 5;\n    std::vector<dsn::rpc_address> server_list;\n    std::vector<int> drop_set;\n    generate_node_list(server_list, 10, 10);\n\n    std::shared_ptr<meta_service> meta_svc = std::make_shared<meta_service>();\n    meta_service *svc = meta_svc.get();\n    meta_options &opt = svc->_meta_opts;\n    opt.cluster_root = \"/meta_test\";\n    opt.meta_state_service_type = \"meta_state_service_simple\";\n    svc->remote_storage_initialize();\n\n    std::string apps_root = \"/meta_test/apps\";\n    std::shared_ptr<server_state> ss1 = svc->_state;\n\n    // create apss randomly, and sync it to meta state service simple\n    std::cerr << \"testing create apps and sync to remote storage\" << std::endl;\n    {\n        server_state *ss = ss1.get();\n        ss->initialize(svc, apps_root);\n\n        drop_set.clear();\n        for (int i = 1; i <= apps_count; ++i) {\n            dsn::app_info info;\n            info.is_stateful = true;\n            info.app_id = i;\n            info.app_type = \"simple_kv\";\n            info.app_name = \"test_app\" + boost::lexical_cast<std::string>(i);\n            info.max_replica_count = 3;\n            info.partition_count = random32(100, 10000);\n            info.status = dsn::app_status::AS_CREATING;\n            std::shared_ptr<app_state> app = app_state::create(info);\n\n            ss->_all_apps.emplace(app->app_id, app);\n            if (i < apps_count && random32(1, apps_count) <= drop_ratio) {\n                app->status = dsn::app_status::AS_DROPPING;\n                drop_set.push_back(i);\n                app->app_name = \"test_app\" + boost::lexical_cast<std::string>(apps_count);\n            }\n        }\n        for (int i = 1; i <= apps_count; ++i) {\n            std::shared_ptr<app_state> app = ss->get_app(i);\n            random_assign_partition_config(app, server_list, 3);\n            if (app->status == dsn::app_status::AS_DROPPING) {\n                for (int j = 0; j < app->partition_count; ++j) {\n                    app->partitions[j].partition_flags = pc_flags::dropped;\n                }\n            }\n        }\n\n        dsn::error_code ec = ss->sync_apps_to_remote_storage();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n        ss->spin_wait_staging();\n    }\n\n    // then we sync from meta_state_service_simple, and dump to local file\n    std::cerr << \"testing sync from remote storage and dump to local file\" << std::endl;\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n        ss2->initialize(svc, apps_root);\n        dsn::error_code ec = ss2->sync_apps_from_remote_storage();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n\n        for (int i = 1; i <= apps_count; ++i) {\n            std::shared_ptr<app_state> app = ss2->get_app(i);\n            for (int j = 0; j < app->partition_count; ++j) {\n                config_context &cc = app->helpers->contexts[j];\n                ASSERT_EQ(1, cc.dropped.size());\n                ASSERT_NE(cc.dropped.end(), cc.find_from_dropped(server_list.back()));\n            }\n        }\n        ec = ss2->dump_from_remote_storage(\"meta_state.dump1\", false);\n        ASSERT_EQ(ec, dsn::ERR_OK);\n    }\n\n    // dump another way\n    std::cerr << \"testing directly dump to local file\" << std::endl;\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n        ss2->initialize(svc, apps_root);\n        dsn::error_code ec = ss2->dump_from_remote_storage(\"meta_state.dump2\", true);\n\n        ASSERT_EQ(ec, dsn::ERR_OK);\n        file_data_compare(\"meta_state.dump1\", \"meta_state.dump2\");\n    }\n\n    opt.meta_state_service_type = \"meta_state_service_zookeeper\";\n    svc->remote_storage_initialize();\n    // first clean up\n    std::cerr << \"start to clean up zookeeper storage\" << std::endl;\n    {\n        dsn::error_code ec;\n        dsn::dist::meta_state_service *storage = svc->get_remote_storage();\n        storage\n            ->delete_node(apps_root,\n                          true,\n                          LPC_META_CALLBACK,\n                          [&ec](dsn::error_code error) { ec = error; },\n                          nullptr)\n            ->wait();\n        ASSERT_TRUE(dsn::ERR_OK == ec || dsn::ERR_OBJECT_NOT_FOUND == ec);\n    }\n\n    std::cerr << \"test sync to zookeeper's remote storage\" << std::endl;\n    // restore from the local file, and restore to zookeeper\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n\n        ss2->initialize(svc, apps_root);\n        dsn::error_code ec = ss2->restore_from_local_storage(\"meta_state.dump2\");\n        ASSERT_EQ(ec, dsn::ERR_OK);\n    }\n\n    // then sync from zookeeper\n    std::cerr << \"test sync from zookeeper's storage\" << std::endl;\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n        ss2->initialize(svc, apps_root);\n\n        dsn::error_code ec = ss2->initialize_data_structure();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n\n        app_mapper_compare(ss1->_all_apps, ss2->_all_apps);\n        ASSERT_EQ(ss1->_exist_apps.size(), ss2->_exist_apps.size());\n        for (const auto &iter : ss1->_exist_apps) {\n            ASSERT_TRUE(ss2->_exist_apps.find(iter.first) != ss2->_exist_apps.end());\n        }\n\n        // then we dump the content to local file with binary format\n        std::cerr << \"test dump to local file from zookeeper's storage\" << std::endl;\n        ec = ss2->dump_from_remote_storage(\"meta_state.dump3\", false);\n        ASSERT_EQ(ec, dsn::ERR_OK);\n    }\n\n    // then we restore from local storage and restore to remote\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n\n        ss2->initialize(svc, apps_root);\n        dsn::error_code ec = ss2->restore_from_local_storage(\"meta_state.dump3\");\n        ASSERT_EQ(ec, dsn::ERR_OK);\n\n        app_mapper_compare(ss1->_all_apps, ss2->_all_apps);\n        ASSERT_TRUE(ss1->_exist_apps.size() == ss2->_exist_apps.size());\n        for (const auto &iter : ss1->_exist_apps) {\n            ASSERT_TRUE(ss2->_exist_apps.find(iter.first) != ss2->_exist_apps.end());\n        }\n        ss2->initialize_node_state();\n\n        // then let's test the query configuration calls\n        // 1.1. normal gpid\n        dsn::gpid gpid = {15, 0};\n        dsn::partition_configuration pc;\n        ASSERT_TRUE(ss2->query_configuration_by_gpid(gpid, pc));\n        ASSERT_EQ(ss1->_all_apps[15]->partitions[0], pc);\n        // 1.2 dropped app\n        if (!drop_set.empty()) {\n            gpid.set_app_id(drop_set[0]);\n            ASSERT_FALSE(ss2->query_configuration_by_gpid(gpid, pc));\n        }\n\n        // 2.1 query configuration by index\n        dsn::configuration_query_by_index_request req;\n        dsn::configuration_query_by_index_response resp;\n        req.app_name = \"test_app15\";\n        req.partition_indices = {-1, 1, 2, 3, 0x7fffffff};\n\n        std::shared_ptr<app_state> app_created = ss1->get_app(15);\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_OK, resp.err);\n        ASSERT_EQ(15, resp.app_id);\n        ASSERT_EQ(app_created->partition_count, resp.partition_count);\n        ASSERT_EQ(resp.partitions.size(), 3);\n        for (int i = 1; i <= 3; ++i)\n            ASSERT_EQ(resp.partitions[i - 1], app_created->partitions[i]);\n\n        // 2.2 no exist app\n        req.app_name = \"make_no_sense\";\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_OBJECT_NOT_FOUND, resp.err);\n\n        // 2.3 app is dropping/creating/recalling\n        std::shared_ptr<app_state> app = ss2->get_app(15);\n        req.app_name = app->app_name;\n\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_OK, resp.err);\n\n        app->status = dsn::app_status::AS_DROPPING;\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_BUSY_DROPPING, resp.err);\n\n        app->status = dsn::app_status::AS_RECALLING;\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_BUSY_CREATING, resp.err);\n\n        app->status = dsn::app_status::AS_CREATING;\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_BUSY_CREATING, resp.err);\n\n        // client unknown state\n        app->status = dsn::app_status::AS_DROP_FAILED;\n        ss2->query_configuration_by_index(req, resp);\n        ASSERT_EQ(dsn::ERR_UNKNOWN, resp.err);\n    }\n\n    // simulate the half creating\n    std::cerr << \"test some node for a app is not create on remote storage\" << std::endl;\n    {\n        std::shared_ptr<server_state> ss2 = std::make_shared<server_state>();\n        dsn::error_code ec;\n        ss2->initialize(svc, apps_root);\n\n        dsn::dist::meta_state_service *storage = svc->get_remote_storage();\n        storage\n            ->delete_node(ss2->get_partition_path(dsn::gpid{apps_count, 0}),\n                          false,\n                          LPC_META_CALLBACK,\n                          [&ec](dsn::error_code error) { ec = error; },\n                          nullptr)\n            ->wait();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n\n        ec = ss2->sync_apps_from_remote_storage();\n        ASSERT_EQ(ec, dsn::ERR_OK);\n        ASSERT_TRUE(ss2->spin_wait_staging(30));\n    }\n}\n\nstatic dsn::app_info create_app_info(dsn::app_status::type status,\n                                     std::string app_name,\n                                     int32_t id,\n                                     int32_t partition_count)\n{\n    dsn::app_info info;\n    info.status = status;\n    info.app_type = \"pegasus\";\n    info.app_name = app_name;\n    info.app_id = id;\n    info.partition_count = partition_count;\n    info.is_stateful = true;\n    info.max_replica_count = 3;\n    info.expire_second = 0;\n\n    return info;\n}\n\nvoid meta_service_test_app::construct_apps_test()\n{\n    std::vector<dsn::app_info> apps = {\n        create_app_info(dsn::app_status::AS_AVAILABLE, \"test__4\", 2, 10),\n        create_app_info(dsn::app_status::AS_AVAILABLE, \"test\", 4, 20),\n        create_app_info(dsn::app_status::AS_AVAILABLE, \"test\", 6, 30)};\n\n    query_app_info_response resp;\n    resp.apps = apps;\n    resp.err = dsn::ERR_OK;\n\n    std::shared_ptr<meta_service> svc(new meta_service());\n\n    std::vector<dsn::rpc_address> nodes;\n    std::string hint_message;\n    generate_node_list(nodes, 1, 1);\n    svc->_state->construct_apps({resp}, nodes, hint_message);\n\n    meta_view mv = svc->_state->get_meta_view();\n    const app_mapper &mapper = *(mv.apps);\n    ASSERT_EQ(6, mv.apps->size());\n\n    std::vector<dsn::app_info> result_apps = {\n        create_app_info(dsn::app_status::AS_DROPPING, \"__drop_holder__1\", 1, 1),\n        create_app_info(dsn::app_status::AS_CREATING, \"test__4__2\", 2, 10),\n        create_app_info(dsn::app_status::AS_DROPPING, \"__drop_holder__3\", 3, 1),\n        create_app_info(dsn::app_status::AS_CREATING, \"test__4\", 4, 20),\n        create_app_info(dsn::app_status::AS_DROPPING, \"__drop_holder__5\", 5, 1),\n        create_app_info(dsn::app_status::AS_CREATING, \"test\", 6, 30)};\n\n    int i = 0;\n    for (const auto &kv_pair : mapper) {\n        ASSERT_EQ(kv_pair.second->app_id, result_apps[i].app_id);\n        ASSERT_EQ(kv_pair.second->app_name, result_apps[i].app_name);\n        ASSERT_EQ(kv_pair.second->app_type, result_apps[i].app_type);\n        ASSERT_EQ(kv_pair.second->partition_count, result_apps[i].partition_count);\n        ASSERT_EQ(kv_pair.second->max_replica_count, result_apps[i].max_replica_count);\n        ASSERT_EQ(kv_pair.second->is_stateful, result_apps[i].is_stateful);\n        ASSERT_EQ(kv_pair.second->status, result_apps[i].status);\n        i++;\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/meta/test/suite1",
    "content": "4\n127.0.0.1:1\n127.0.0.1:2\n127.0.0.1:3\n127.0.0.1:4\n2\n1 6\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:2 127.0.0.1:1 127.0.0.1:3\n3 127.0.0.1:3 127.0.0.1:1 127.0.0.1:2\n2 5\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:2 127.0.0.1:1 127.0.0.1:3\n3 127.0.0.1:3 127.0.0.1:2 127.0.0.1:1\n3 127.0.0.1:4 127.0.0.1:1 127.0.0.1:2\n"
  },
  {
    "path": "src/meta/test/suite2",
    "content": "4\n127.0.0.1:1\n127.0.0.1:2\n127.0.0.1:3\n127.0.0.1:4\n2\n1 7\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:2 127.0.0.1:1 127.0.0.1:3\n3 127.0.0.1:3 127.0.0.1:1 127.0.0.1:2\n2 5\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:1 127.0.0.1:2 127.0.0.1:3\n3 127.0.0.1:2 127.0.0.1:1 127.0.0.1:3\n3 127.0.0.1:3 127.0.0.1:2 127.0.0.1:1\n3 127.0.0.1:4 127.0.0.1:1 127.0.0.1:2\n"
  },
  {
    "path": "src/meta/test/update_configuration_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_c.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/zlocks.h>\n\n#include \"meta/meta_service.h\"\n#include \"meta/server_state.h\"\n#include \"meta/greedy_load_balancer.h\"\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/test/misc/misc.h\"\n\n#include \"meta_service_test_app.h\"\n#include \"dummy_balancer.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint64(min_live_node_count_for_unfreeze);\n\nclass fake_sender_meta_service : public dsn::replication::meta_service\n{\nprivate:\n    meta_service_test_app *_app;\n\npublic:\n    fake_sender_meta_service(meta_service_test_app *app) : meta_service(), _app(app) {}\n\n    virtual void reply_message(dsn::message_ex *request, dsn::message_ex *response) override\n    {\n        destroy_message(response);\n    }\n    virtual void send_message(const dsn::rpc_address &target, dsn::message_ex *request) override\n    {\n        // we expect this is a configuration_update_request proposal\n        dsn::message_ex *recv_request = create_corresponding_receive(request);\n\n        std::shared_ptr<configuration_update_request> update_req =\n            std::make_shared<configuration_update_request>();\n        ::dsn::unmarshall(recv_request, *update_req);\n\n        destroy_message(request);\n        destroy_message(recv_request);\n\n        dsn::partition_configuration &pc = update_req->config;\n        pc.ballot++;\n\n        switch (update_req->type) {\n        case config_type::CT_ASSIGN_PRIMARY:\n        case config_type::CT_UPGRADE_TO_PRIMARY:\n            pc.primary = update_req->node;\n            replica_helper::remove_node(update_req->node, pc.secondaries);\n            break;\n\n        case config_type::CT_ADD_SECONDARY:\n        case config_type::CT_ADD_SECONDARY_FOR_LB:\n            pc.secondaries.push_back(update_req->node);\n            update_req->type = config_type::CT_UPGRADE_TO_SECONDARY;\n            break;\n\n        case config_type::CT_REMOVE:\n        case config_type::CT_DOWNGRADE_TO_INACTIVE:\n            if (update_req->node == pc.primary)\n                pc.primary.set_invalid();\n            else\n                replica_helper::remove_node(update_req->node, pc.secondaries);\n            break;\n\n        case config_type::CT_DOWNGRADE_TO_SECONDARY:\n            pc.secondaries.push_back(pc.primary);\n            pc.primary.set_invalid();\n            break;\n        default:\n            break;\n        }\n\n        _app->call_update_configuration(this, update_req);\n    }\n};\n\nclass null_meta_service : public dsn::replication::meta_service\n{\npublic:\n    void send_message(const dsn::rpc_address &target, dsn::message_ex *request)\n    {\n        ddebug(\"send request to %s\", target.to_string());\n        request->add_ref();\n        request->release_ref();\n    }\n};\n\nclass dummy_partition_guardian : public partition_guardian\n{\npublic:\n    explicit dummy_partition_guardian(meta_service *s) : partition_guardian(s) {}\n\n    pc_status cure(meta_view view, const dsn::gpid &gpid, configuration_proposal_action &action)\n    {\n        action.type = config_type::CT_INVALID;\n        const dsn::partition_configuration &pc = *get_config(*view.apps, gpid);\n        if (!pc.primary.is_invalid() && pc.secondaries.size() == 2)\n            return pc_status::healthy;\n        return pc_status::ill;\n    }\n};\n\nvoid meta_service_test_app::call_update_configuration(\n    meta_service *svc, std::shared_ptr<dsn::replication::configuration_update_request> &request)\n{\n    dsn::message_ex *fake_request =\n        dsn::message_ex::create_request(RPC_CM_UPDATE_PARTITION_CONFIGURATION);\n    ::dsn::marshall(fake_request, *request);\n    fake_request->add_ref();\n\n    dsn::tasking::enqueue(\n        LPC_META_STATE_HIGH,\n        nullptr,\n        std::bind(&server_state::on_update_configuration, svc->_state.get(), request, fake_request),\n        server_state::sStateHash);\n}\n\nvoid meta_service_test_app::call_config_sync(\n    meta_service *svc, std::shared_ptr<configuration_query_by_node_request> &request)\n{\n    dsn::message_ex *fake_request = dsn::message_ex::create_request(RPC_CM_CONFIG_SYNC);\n    ::dsn::marshall(fake_request, *request);\n\n    dsn::message_ex *recvd_request = create_corresponding_receive(fake_request);\n    destroy_message(fake_request);\n\n    auto rpc = rpc_holder<configuration_query_by_node_request,\n                          configuration_query_by_node_response>::auto_reply(recvd_request);\n    dsn::tasking::enqueue(LPC_META_STATE_HIGH,\n                          nullptr,\n                          std::bind(&server_state::on_config_sync, svc->_state.get(), rpc),\n                          server_state::sStateHash);\n}\n\nbool meta_service_test_app::wait_state(server_state *ss, const state_validator &validator, int time)\n{\n    for (int i = 0; i != time;) {\n        dsn::task_ptr t = dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                                                nullptr,\n                                                std::bind(&server_state::check_all_partitions, ss),\n                                                server_state::sStateHash,\n                                                std::chrono::seconds(1));\n        t->wait();\n\n        {\n            dsn::zauto_read_lock l(ss->_lock);\n            if (validator(ss->_all_apps))\n                return true;\n        }\n        if (time != -1)\n            ++i;\n    }\n    return false;\n}\n\nvoid meta_service_test_app::update_configuration_test()\n{\n    dsn::error_code ec;\n    std::shared_ptr<fake_sender_meta_service> svc(new fake_sender_meta_service(this));\n    svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get()));\n    ec = svc->remote_storage_initialize();\n    ASSERT_EQ(ec, dsn::ERR_OK);\n    svc->_partition_guardian.reset(new partition_guardian(svc.get()));\n    svc->_balancer.reset(new dummy_balancer(svc.get()));\n\n    server_state *ss = svc->_state.get();\n    ss->initialize(svc.get(), meta_options::concat_path_unix_style(svc->_cluster_root, \"apps\"));\n    dsn::app_info info;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_CREATING;\n    info.app_id = 1;\n    info.app_name = \"simple_kv.instance0\";\n    info.app_type = \"simple_kv\";\n    info.max_replica_count = 3;\n    info.partition_count = 2;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    ss->_all_apps.emplace(1, app);\n\n    std::vector<dsn::rpc_address> nodes;\n    generate_node_list(nodes, 4, 4);\n\n    dsn::partition_configuration &pc0 = app->partitions[0];\n    pc0.primary = nodes[0];\n    pc0.secondaries.push_back(nodes[1]);\n    pc0.secondaries.push_back(nodes[2]);\n    pc0.ballot = 3;\n\n    dsn::partition_configuration &pc1 = app->partitions[1];\n    pc1.primary = nodes[1];\n    pc1.secondaries.push_back(nodes[0]);\n    pc1.secondaries.push_back(nodes[2]);\n    pc1.ballot = 3;\n\n    ss->sync_apps_to_remote_storage();\n    ASSERT_TRUE(ss->spin_wait_staging(30));\n    ss->initialize_node_state();\n    svc->set_node_state({nodes[0], nodes[1], nodes[2]}, true);\n    svc->_started = true;\n\n    // test remove primary\n    state_validator validator1 = [pc0](const app_mapper &apps) {\n        const dsn::partition_configuration *pc = get_config(apps, pc0.pid);\n        return pc->ballot == pc0.ballot + 2 && pc->secondaries.size() == 1 &&\n               std::find(pc0.secondaries.begin(), pc0.secondaries.end(), pc->primary) !=\n                   pc0.secondaries.end();\n    };\n\n    // test kickoff secondary\n    dsn::rpc_address addr = nodes[0];\n    state_validator validator2 = [pc1, addr](const app_mapper &apps) {\n        const dsn::partition_configuration *pc = get_config(apps, pc1.pid);\n        return pc->ballot == pc1.ballot + 1 && pc->secondaries.size() == 1 &&\n               pc->secondaries.front() != addr;\n    };\n\n    svc->set_node_state({nodes[0]}, false);\n    ASSERT_TRUE(wait_state(ss, validator1, 30));\n    ASSERT_TRUE(wait_state(ss, validator2, 30));\n\n    // test add secondary\n    svc->set_node_state({nodes[3]}, true);\n    state_validator validator3 = [pc0](const app_mapper &apps) {\n        const dsn::partition_configuration *pc = get_config(apps, pc0.pid);\n        return pc->ballot == pc0.ballot + 1 && pc->secondaries.size() == 2;\n    };\n    // the default delay for add node is 5 miniutes\n    ASSERT_FALSE(wait_state(ss, validator3, 10));\n    svc->_meta_opts._lb_opts.replica_assign_delay_ms_for_dropouts = 0;\n    svc->_partition_guardian.reset(new partition_guardian(svc.get()));\n    svc->_balancer.reset(new dummy_balancer(svc.get()));\n    ASSERT_TRUE(wait_state(ss, validator3, 10));\n}\n\nvoid meta_service_test_app::adjust_dropped_size()\n{\n    dsn::error_code ec;\n    std::shared_ptr<null_meta_service> svc(new null_meta_service());\n    svc->_failure_detector.reset(new dsn::replication::meta_server_failure_detector(svc.get()));\n    ec = svc->remote_storage_initialize();\n    ASSERT_EQ(ec, dsn::ERR_OK);\n    svc->_partition_guardian.reset(new partition_guardian(svc.get()));\n    svc->_balancer.reset(new dummy_balancer(svc.get()));\n\n    server_state *ss = svc->_state.get();\n    ss->initialize(svc.get(), meta_options::concat_path_unix_style(svc->_cluster_root, \"apps\"));\n    dsn::app_info info;\n    info.is_stateful = true;\n    info.status = dsn::app_status::AS_CREATING;\n    info.app_id = 1;\n    info.app_name = \"simple_kv.instance0\";\n    info.app_type = \"simple_kv\";\n    info.max_replica_count = 3;\n    info.partition_count = 1;\n    std::shared_ptr<app_state> app = app_state::create(info);\n\n    ss->_all_apps.emplace(1, app);\n\n    std::vector<dsn::rpc_address> nodes;\n    generate_node_list(nodes, 10, 10);\n\n    // first, the replica is healthy, and there are 2 dropped\n    dsn::partition_configuration &pc = app->partitions[0];\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1], nodes[2]};\n    pc.ballot = 10;\n\n    config_context &cc = *get_config_context(ss->_all_apps, pc.pid);\n    cc.dropped = {\n        dropped_replica{nodes[3], dropped_replica::INVALID_TIMESTAMP, 7, 11, 14},\n        dropped_replica{nodes[4], 20, invalid_ballot, invalid_decree, invalid_decree},\n    };\n\n    ss->sync_apps_to_remote_storage();\n    generate_node_mapper(ss->_nodes, ss->_all_apps, nodes);\n\n    // then we receive a request for upgrade a node to secondary\n    std::shared_ptr<configuration_update_request> req =\n        std::make_shared<configuration_update_request>();\n    req->config = pc;\n    req->config.ballot++;\n    req->config.secondaries.push_back(nodes[5]);\n    req->info = info;\n    req->node = nodes[5];\n    req->type = config_type::CT_UPGRADE_TO_SECONDARY;\n    call_update_configuration(svc.get(), req);\n\n    spin_wait_condition([&pc]() { return pc.ballot == 11; }, 10);\n\n    // then receive a config_sync request fro nodes[4], which has less data than node[3]\n    std::shared_ptr<configuration_query_by_node_request> req2 =\n        std::make_shared<configuration_query_by_node_request>();\n    req2->__set_node(nodes[4]);\n\n    replica_info rep_info;\n    rep_info.pid = pc.pid;\n    rep_info.ballot = 6;\n    rep_info.status = partition_status::PS_ERROR;\n    rep_info.last_committed_decree = 9;\n    rep_info.last_prepared_decree = 10;\n    rep_info.last_durable_decree = 5;\n    rep_info.app_type = \"pegasus\";\n\n    req2->__set_stored_replicas({rep_info});\n    call_config_sync(svc.get(), req2);\n\n    auto status_check = [&cc, &nodes, &rep_info] {\n        if (cc.dropped.size() != 1)\n            return false;\n        dropped_replica &d = cc.dropped[0];\n        if (d.time != dropped_replica::INVALID_TIMESTAMP)\n            return false;\n        if (d.node != nodes[4])\n            return false;\n        if (d.last_committed_decree != rep_info.last_committed_decree)\n            return false;\n        return true;\n    };\n\n    spin_wait_condition(status_check, 10);\n}\n\nstatic void clone_app_mapper(app_mapper &output, const app_mapper &input)\n{\n    output.clear();\n    for (auto &iter : input) {\n        const std::shared_ptr<app_state> &old_app = iter.second;\n        dsn::app_info info = *old_app;\n        std::shared_ptr<app_state> new_app = app_state::create(info);\n        for (unsigned int i = 0; i != old_app->partition_count; ++i)\n            new_app->partitions[i] = old_app->partitions[i];\n        output.emplace(new_app->app_id, new_app);\n    }\n}\n\nvoid meta_service_test_app::apply_balancer_test()\n{\n    dsn::error_code ec;\n    std::shared_ptr<fake_sender_meta_service> meta_svc(new fake_sender_meta_service(this));\n    ec = meta_svc->remote_storage_initialize();\n    ASSERT_EQ(dsn::ERR_OK, ec);\n\n    meta_svc->_failure_detector.reset(\n        new dsn::replication::meta_server_failure_detector(meta_svc.get()));\n    meta_svc->_partition_guardian.reset(new partition_guardian(meta_svc.get()));\n    meta_svc->_balancer.reset(new greedy_load_balancer(meta_svc.get()));\n\n    // initialize data structure\n    std::vector<dsn::rpc_address> node_list;\n    generate_node_list(node_list, 5, 10);\n\n    server_state *ss = meta_svc->_state.get();\n    generate_apps(ss->_all_apps, node_list, 5, 5, std::pair<uint32_t, uint32_t>(2, 5), false);\n\n    app_mapper backed_app;\n    node_mapper backed_nodes;\n\n    clone_app_mapper(backed_app, ss->_all_apps);\n    generate_node_mapper(backed_nodes, backed_app, node_list);\n\n    // before initialize, we need to mark apps to AS_CREATING:\n    for (auto &kv : ss->_all_apps) {\n        kv.second->status = dsn::app_status::AS_CREATING;\n    }\n    ss->initialize(meta_svc.get(), \"/meta_test/apps\");\n    ASSERT_EQ(dsn::ERR_OK, meta_svc->_state->sync_apps_to_remote_storage());\n    ASSERT_TRUE(ss->spin_wait_staging(30));\n    ss->initialize_node_state();\n\n    meta_svc->_started = true;\n    meta_svc->set_node_state(node_list, true);\n\n    app_mapper_compare(backed_app, ss->_all_apps);\n    // run balancer\n    bool result;\n\n    auto migration_actions = [&backed_app, &backed_nodes](const migration_list &list) {\n        migration_list result;\n        for (auto &iter : list) {\n            std::shared_ptr<configuration_balancer_request> req =\n                std::make_shared<configuration_balancer_request>(*(iter.second));\n            result.emplace(iter.first, req);\n        }\n        migration_check_and_apply(backed_app, backed_nodes, result, nullptr);\n    };\n\n    ss->set_replica_migration_subscriber_for_test(migration_actions);\n    while (true) {\n        dsn::task_ptr tsk =\n            dsn::tasking::enqueue(LPC_META_STATE_NORMAL,\n                                  nullptr,\n                                  [&result, ss]() { result = ss->check_all_partitions(); },\n                                  server_state::sStateHash);\n        tsk->wait();\n        if (result)\n            break;\n        else\n            std::this_thread::sleep_for(std::chrono::milliseconds(500));\n    }\n\n    app_mapper_compare(backed_app, ss->_all_apps);\n}\n\nvoid meta_service_test_app::cannot_run_balancer_test()\n{\n    std::shared_ptr<null_meta_service> svc(new null_meta_service());\n\n    // save original FLAGS_min_live_node_count_for_unfreeze\n    auto reserved_min_live_node_count_for_unfreeze = FLAGS_min_live_node_count_for_unfreeze;\n\n    // set FLAGS_min_live_node_count_for_unfreeze directly to bypass its flag validator\n    FLAGS_min_live_node_count_for_unfreeze = 0;\n\n    svc->_meta_opts.node_live_percentage_threshold_for_update = 0;\n\n    svc->_state->initialize(svc.get(), \"/\");\n    svc->_failure_detector.reset(new meta_server_failure_detector(svc.get()));\n    svc->_balancer.reset(new dummy_balancer(svc.get()));\n    svc->_partition_guardian.reset(new dummy_partition_guardian(svc.get()));\n\n    std::vector<dsn::rpc_address> nodes;\n    generate_node_list(nodes, 10, 10);\n\n    dsn::app_info info;\n    info.app_id = 1;\n    info.app_name = \"test\";\n    info.app_type = \"pegasus\";\n    info.expire_second = 0;\n    info.is_stateful = true;\n    info.max_replica_count = 3;\n    info.partition_count = 1;\n    info.status = dsn::app_status::AS_AVAILABLE;\n\n    std::shared_ptr<app_state> the_app = app_state::create(info);\n    svc->_state->_all_apps.emplace(info.app_id, the_app);\n    svc->_state->_exist_apps.emplace(info.app_name, the_app);\n\n    dsn::partition_configuration &pc = the_app->partitions[0];\n    pc.primary = nodes[0];\n    pc.secondaries = {nodes[1], nodes[2]};\n\n#define REGENERATE_NODE_MAPPER                                                                     \\\n    svc->_state->_nodes.clear();                                                                   \\\n    generate_node_mapper(svc->_state->_nodes, svc->_state->_all_apps, nodes)\n\n    REGENERATE_NODE_MAPPER;\n    // stage are freezed\n    svc->_function_level.store(meta_function_level::fl_freezed);\n    ASSERT_FALSE(svc->_state->check_all_partitions());\n\n    // stage are steady\n    svc->_function_level.store(meta_function_level::fl_steady);\n    ASSERT_FALSE(svc->_state->check_all_partitions());\n\n    // all the partitions are not healthy\n    svc->_function_level.store(meta_function_level::fl_lively);\n    pc.primary.set_invalid();\n    REGENERATE_NODE_MAPPER;\n\n    ASSERT_FALSE(svc->_state->check_all_partitions());\n\n    // some dropped node still exists in nodes\n    pc.primary = nodes[0];\n    REGENERATE_NODE_MAPPER;\n    get_node_state(svc->_state->_nodes, pc.primary, true)->set_alive(false);\n    ASSERT_FALSE(svc->_state->check_all_partitions());\n\n    // some apps are staging\n    REGENERATE_NODE_MAPPER;\n    the_app->status = dsn::app_status::AS_DROPPING;\n    ASSERT_FALSE(svc->_state->check_all_partitions());\n\n    // call function can run balancer\n    the_app->status = dsn::app_status::AS_AVAILABLE;\n    ASSERT_TRUE(svc->_state->can_run_balancer());\n\n    // recover original FLAGS_min_live_node_count_for_unfreeze\n    FLAGS_min_live_node_count_for_unfreeze = reserved_min_live_node_count_for_unfreeze;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_nfs)\n\nthrift_generate_cpp(\n    NFS_THRIFT_SRCS\n    NFS_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/nfs.thrift\n)\n\nset(MY_PROJ_SRC ${NFS_THRIFT_SRCS})\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_aio)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/nfs/nfs.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\ninclude \"../dsn.thrift\"\n\nnamespace cpp dsn.service\n\nstruct copy_request\n{\n    1: dsn.rpc_address source;\n    2: string source_dir;\n    3: string dst_dir;\n    4: string file_name;\n    5: i64 offset;\n    6: i32 size;\n    7: bool is_last;\n    8: bool overwrite;\n    9: optional string source_disk_tag;\n}\n\nstruct copy_response\n{\n    1: dsn.error_code error;\n    2: dsn.blob file_content;\n    3: i64 offset;\n    4: i32 size;\n}\n\nstruct get_file_size_request\n{\n    1: dsn.rpc_address source;\n    2: string dst_dir;\n    3: list<string> file_list;\n    4: string source_dir;\n    5: bool overwrite;\n    6: optional string source_disk_tag;\n    7: optional string dest_disk_tag;\n}\n\nstruct get_file_size_response\n{\n    1: i32 error;\n    2: list<string> file_list;\n    3: list<i64> size_list;\n}\n"
  },
  {
    "path": "src/nfs/nfs_client_impl.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"nfs_client_impl.h\"\n\n#include <fcntl.h>\n\n#include <queue>\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/tool-api/command_manager.h>\n\nnamespace dsn {\nnamespace service {\nstatic uint32_t current_max_copy_rate_megabytes = 0;\n\nDSN_DEFINE_uint32(\"nfs\",\n                  nfs_copy_block_bytes,\n                  4 * 1024 * 1024,\n                  \"max block size (bytes) for each network copy\");\nDSN_DEFINE_uint32(\n    \"nfs\",\n    max_copy_rate_megabytes_per_disk,\n    0,\n    \"max rate per disk of copying from remote node(MB/s), zero means disable rate limiter\");\nDSN_TAG_VARIABLE(max_copy_rate_megabytes_per_disk, FT_MUTABLE);\n// max_copy_rate_bytes should be zero or greater than nfs_copy_block_bytes which is the max\n// batch copy size once\nDSN_DEFINE_group_validator(max_copy_rate_megabytes_per_disk, [](std::string &message) -> bool {\n    return FLAGS_max_copy_rate_megabytes_per_disk == 0 ||\n           (FLAGS_max_copy_rate_megabytes_per_disk << 20) > FLAGS_nfs_copy_block_bytes;\n});\n\nDSN_DEFINE_int32(\"nfs\",\n                 max_concurrent_remote_copy_requests,\n                 50,\n                 \"max concurrent remote copy to the same server on nfs client\");\nDSN_DEFINE_int32(\"nfs\", max_concurrent_local_writes, 50, \"max local file writes on nfs client\");\nDSN_DEFINE_int32(\"nfs\", max_buffered_local_writes, 500, \"max buffered file writes on nfs client\");\nDSN_DEFINE_int32(\"nfs\",\n                 high_priority_speed_rate,\n                 2,\n                 \"the copy speed rate of high priority comparing with low priority on nfs client\");\nDSN_DEFINE_int32(\"nfs\",\n                 file_close_expire_time_ms,\n                 60 * 1000,\n                 \"max idle time for an opening file on nfs server\");\nDSN_DEFINE_int32(\"nfs\",\n                 file_close_timer_interval_ms_on_server,\n                 30 * 1000,\n                 \"time interval for checking whether cached file handles need to be closed\");\nDSN_DEFINE_int32(\"nfs\",\n                 max_file_copy_request_count_per_file,\n                 2,\n                 \"maximum concurrent remote copy requests for the same file on nfs client\"\n                 \"to limit each file copy speed\");\nDSN_DEFINE_int32(\"nfs\",\n                 max_retry_count_per_copy_request,\n                 2,\n                 \"maximum retry count when copy failed\");\nDSN_DEFINE_int32(\"nfs\",\n                 rpc_timeout_ms,\n                 1e5, // 100s\n                 \"rpc timeout in milliseconds for nfs copy, \"\n                 \"0 means use default timeout of rpc engine\");\n\nnfs_client_impl::nfs_client_impl()\n    : _concurrent_copy_request_count(0),\n      _concurrent_local_write_count(0),\n      _buffered_local_write_count(0),\n      _copy_requests_low(FLAGS_max_file_copy_request_count_per_file),\n      _high_priority_remaining_time(FLAGS_high_priority_speed_rate)\n{\n    _recent_copy_data_size.init_app_counter(\"eon.nfs_client\",\n                                            \"recent_copy_data_size\",\n                                            COUNTER_TYPE_VOLATILE_NUMBER,\n                                            \"nfs client copy data size in the recent period\");\n    _recent_copy_fail_count.init_app_counter(\n        \"eon.nfs_client\",\n        \"recent_copy_fail_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"nfs client copy fail count count in the recent period\");\n    _recent_write_data_size.init_app_counter(\"eon.nfs_client\",\n                                             \"recent_write_data_size\",\n                                             COUNTER_TYPE_VOLATILE_NUMBER,\n                                             \"nfs client write data size in the recent period\");\n    _recent_write_fail_count.init_app_counter(\n        \"eon.nfs_client\",\n        \"recent_write_fail_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"nfs client write fail count count in the recent period\");\n\n    _copy_token_buckets = std::make_unique<utils::token_buckets>();\n\n    register_cli_commands();\n}\n\nnfs_client_impl::~nfs_client_impl()\n{\n    _tracker.cancel_outstanding_tasks();\n    UNREGISTER_VALID_HANDLER(_nfs_max_copy_rate_megabytes_cmd);\n}\n\nvoid nfs_client_impl::begin_remote_copy(std::shared_ptr<remote_copy_request> &rci,\n                                        aio_task *nfs_task)\n{\n    user_request_ptr req(new user_request());\n    req->high_priority = rci->high_priority;\n    req->file_size_req.source = rci->source;\n    req->file_size_req.dst_dir = rci->dest_dir;\n    req->file_size_req.file_list = rci->files;\n    req->file_size_req.source_dir = rci->source_dir;\n    req->file_size_req.overwrite = rci->overwrite;\n    req->file_size_req.__set_source_disk_tag(rci->source_disk_tag);\n    req->file_size_req.__set_dest_disk_tag(rci->dest_disk_tag);\n    req->nfs_task = nfs_task;\n    req->is_finished = false;\n\n    async_nfs_get_file_size(req->file_size_req,\n                            [=](error_code err, get_file_size_response &&resp) {\n                                end_get_file_size(err, std::move(resp), req);\n                            },\n                            std::chrono::milliseconds(FLAGS_rpc_timeout_ms),\n                            req->file_size_req.source);\n}\n\nvoid nfs_client_impl::end_get_file_size(::dsn::error_code err,\n                                        const ::dsn::service::get_file_size_response &resp,\n                                        const user_request_ptr &ureq)\n{\n    if (err != ::dsn::ERR_OK) {\n        derror(\"{nfs_service} remote get file size failed, source = %s, dir = %s, err = %s\",\n               ureq->file_size_req.source.to_string(),\n               ureq->file_size_req.source_dir.c_str(),\n               err.to_string());\n        ureq->nfs_task->enqueue(err, 0);\n        return;\n    }\n\n    err = dsn::error_code(resp.error);\n    if (err != ::dsn::ERR_OK) {\n        derror(\"{nfs_service} remote get file size failed, source = %s, dir = %s, err = %s\",\n               ureq->file_size_req.source.to_string(),\n               ureq->file_size_req.source_dir.c_str(),\n               err.to_string());\n        ureq->nfs_task->enqueue(err, 0);\n        return;\n    }\n\n    std::deque<copy_request_ex_ptr> copy_requests;\n    ureq->file_contexts.resize(resp.size_list.size());\n    for (size_t i = 0; i < resp.size_list.size(); i++) // file list\n    {\n        file_context_ptr filec(new file_context(ureq, resp.file_list[i], resp.size_list[i]));\n        ureq->file_contexts[i] = filec;\n\n        // init copy requests\n        uint64_t size = resp.size_list[i];\n        uint64_t req_offset = 0;\n        uint32_t req_size = size > FLAGS_nfs_copy_block_bytes ? FLAGS_nfs_copy_block_bytes\n                                                              : static_cast<uint32_t>(size);\n\n        filec->copy_requests.reserve(size / FLAGS_nfs_copy_block_bytes + 1);\n        int idx = 0;\n        for (;;) // send one file with multi-round rpc\n        {\n            copy_request_ex_ptr req(\n                new copy_request_ex(filec, idx++, FLAGS_max_retry_count_per_copy_request));\n            req->offset = req_offset;\n            req->size = req_size;\n            req->is_last = (size <= req_size);\n\n            filec->copy_requests.push_back(req);\n            copy_requests.push_back(req);\n\n            req_offset += req_size;\n            size -= req_size;\n            if (size <= 0) {\n                dassert(size == 0, \"last request must read exactly the remaing size of the file\");\n                break;\n            }\n\n            req_size = size > FLAGS_nfs_copy_block_bytes ? FLAGS_nfs_copy_block_bytes\n                                                         : static_cast<uint32_t>(size);\n        }\n    }\n\n    if (!copy_requests.empty()) {\n        zauto_lock l(_copy_requests_lock);\n        if (ureq->high_priority)\n            _copy_requests_high.insert(\n                _copy_requests_high.end(), copy_requests.begin(), copy_requests.end());\n        else\n            _copy_requests_low.push(std::move(copy_requests));\n    }\n\n    tasking::enqueue(LPC_NFS_COPY_FILE, nullptr, [this]() { continue_copy(); }, 0);\n}\n\nvoid nfs_client_impl::continue_copy()\n{\n    if (_buffered_local_write_count >= FLAGS_max_buffered_local_writes) {\n        // exceed max_buffered_local_writes limit, pause.\n        // the copy task will be triggered by continue_copy() invoked in local_write_callback().\n        return;\n    }\n\n    if (++_concurrent_copy_request_count > FLAGS_max_concurrent_remote_copy_requests) {\n        // exceed max_concurrent_remote_copy_requests limit, pause.\n        // the copy task will be triggered by continue_copy() invoked in end_copy().\n        --_concurrent_copy_request_count;\n        return;\n    }\n\n    copy_request_ex_ptr req = nullptr;\n    while (true) {\n        {\n            zauto_lock l(_copy_requests_lock);\n\n            if (_high_priority_remaining_time > 0 && !_copy_requests_high.empty()) {\n                // pop from high queue\n                req = _copy_requests_high.front();\n                _copy_requests_high.pop_front();\n                --_high_priority_remaining_time;\n            } else {\n                // try to pop from low queue\n                req = _copy_requests_low.pop();\n                if (req) {\n                    _high_priority_remaining_time = FLAGS_high_priority_speed_rate;\n                }\n            }\n\n            if (!req && !_copy_requests_high.empty()) {\n                // pop from low queue failed, then pop from high priority,\n                // but not change the _high_priority_remaining_time\n                req = _copy_requests_high.front();\n                _copy_requests_high.pop_front();\n            }\n\n            if (req) {\n                ++req->file_ctx->user_req->concurrent_copy_count;\n            } else {\n                // no copy request\n                --_concurrent_copy_request_count;\n                break;\n            }\n        }\n\n        {\n            zauto_lock l(req->lock);\n            const user_request_ptr &ureq = req->file_ctx->user_req;\n            if (req->is_valid) {\n                if (FLAGS_max_copy_rate_megabytes_per_disk > 0) {\n                    _copy_token_buckets->get_token_bucket(ureq->file_size_req.dest_disk_tag)\n                        ->consumeWithBorrowAndWait(\n                            req->size,\n                            FLAGS_max_copy_rate_megabytes_per_disk << 20,\n                            1.5 * (FLAGS_max_copy_rate_megabytes_per_disk << 20));\n                }\n\n                copy_request copy_req;\n                copy_req.source = ureq->file_size_req.source;\n                copy_req.file_name = req->file_ctx->file_name;\n                copy_req.offset = req->offset;\n                copy_req.size = req->size;\n                copy_req.dst_dir = ureq->file_size_req.dst_dir;\n                copy_req.source_dir = ureq->file_size_req.source_dir;\n                copy_req.overwrite = ureq->file_size_req.overwrite;\n                copy_req.is_last = req->is_last;\n                copy_req.__set_source_disk_tag(ureq->file_size_req.source_disk_tag);\n                req->remote_copy_task =\n                    async_nfs_copy(copy_req,\n                                   [=](error_code err, copy_response &&resp) {\n                                       end_copy(err, std::move(resp), req);\n                                       // reset task to release memory quickly.\n                                       // should do this after end_copy() done.\n                                       if (req->is_ready_for_write) {\n                                           ::dsn::task_ptr tsk;\n                                           zauto_lock l(req->lock);\n                                           tsk = std::move(req->remote_copy_task);\n                                       }\n                                   },\n                                   std::chrono::milliseconds(FLAGS_rpc_timeout_ms),\n                                   req->file_ctx->user_req->file_size_req.source);\n            } else {\n                --ureq->concurrent_copy_count;\n                --_concurrent_copy_request_count;\n            }\n        }\n\n        if (++_concurrent_copy_request_count > FLAGS_max_concurrent_remote_copy_requests) {\n            // exceed max_concurrent_remote_copy_requests limit, pause.\n            // the copy task will be triggered by continue_copy() invoked in end_copy().\n            --_concurrent_copy_request_count;\n            break;\n        }\n    }\n}\n\nvoid nfs_client_impl::end_copy(::dsn::error_code err,\n                               const copy_response &resp,\n                               const copy_request_ex_ptr &reqc)\n{\n    --_concurrent_copy_request_count;\n    --reqc->file_ctx->user_req->concurrent_copy_count;\n\n    const file_context_ptr &fc = reqc->file_ctx;\n\n    if (err == ERR_OK) {\n        err = resp.error;\n    }\n\n    if (err != ::dsn::ERR_OK) {\n        _recent_copy_fail_count->increment();\n\n        if (!fc->user_req->is_finished) {\n            if (reqc->retry_count > 0) {\n                dwarn(\"{nfs_service} remote copy failed, source = %s, dir = %s, file = %s, \"\n                      \"err = %s, retry_count = %d\",\n                      fc->user_req->file_size_req.source.to_string(),\n                      fc->user_req->file_size_req.source_dir.c_str(),\n                      fc->file_name.c_str(),\n                      err.to_string(),\n                      reqc->retry_count);\n\n                // retry copy\n                reqc->retry_count--;\n\n                // put back into copy request queue\n                zauto_lock l(_copy_requests_lock);\n                if (fc->user_req->high_priority)\n                    _copy_requests_high.push_front(reqc);\n                else\n                    _copy_requests_low.push_retry(reqc);\n            } else {\n                derror(\"{nfs_service} remote copy failed, source = %s, dir = %s, file = %s, \"\n                       \"err = %s, retry_count = %d\",\n                       fc->user_req->file_size_req.source.to_string(),\n                       fc->user_req->file_size_req.source_dir.c_str(),\n                       fc->file_name.c_str(),\n                       err.to_string(),\n                       reqc->retry_count);\n\n                handle_completion(fc->user_req, err);\n            }\n        }\n    }\n\n    else {\n        _recent_copy_data_size->add(resp.size);\n\n        reqc->response = resp;\n        reqc->is_ready_for_write = true;\n\n        // prepare write requests\n        std::deque<copy_request_ex_ptr> new_writes;\n        {\n            zauto_lock l(fc->user_req->user_req_lock);\n            if (!fc->user_req->is_finished && fc->current_write_index == reqc->index - 1) {\n                for (int i = reqc->index; i < (int)(fc->copy_requests.size()); i++) {\n                    if (fc->copy_requests[i]->is_ready_for_write) {\n                        fc->current_write_index++;\n                        new_writes.push_back(fc->copy_requests[i]);\n                    } else {\n                        break;\n                    }\n                }\n            }\n        }\n\n        // put write requests into queue\n        if (!new_writes.empty()) {\n            zauto_lock l(_local_writes_lock);\n            _local_writes.insert(_local_writes.end(), new_writes.begin(), new_writes.end());\n            _buffered_local_write_count += new_writes.size();\n        }\n    }\n\n    continue_copy();\n    continue_write();\n}\n\nvoid nfs_client_impl::continue_write()\n{\n    // check write quota\n    if (++_concurrent_local_write_count > FLAGS_max_concurrent_local_writes) {\n        // exceed max_concurrent_local_writes limit, pause.\n        // the copy task will be triggered by continue_write() invoked in\n        // local_write_callback().\n        --_concurrent_local_write_count;\n        return;\n    }\n\n    // get write data\n    copy_request_ex_ptr reqc;\n    while (true) {\n        {\n            zauto_lock l(_local_writes_lock);\n            if (!_local_writes.empty()) {\n                reqc = _local_writes.front();\n                _local_writes.pop_front();\n                --_buffered_local_write_count;\n            } else {\n                // no write data\n                reqc = nullptr;\n                break;\n            }\n        }\n\n        {\n            // only process valid request, and discard invalid request\n            zauto_lock l(reqc->lock);\n            if (reqc->is_valid) {\n                break;\n            }\n        }\n    }\n\n    if (nullptr == reqc) {\n        --_concurrent_local_write_count;\n        return;\n    }\n\n    // real write\n    const file_context_ptr &fc = reqc->file_ctx;\n    std::string file_path =\n        dsn::utils::filesystem::path_combine(fc->user_req->file_size_req.dst_dir, fc->file_name);\n    std::string path = dsn::utils::filesystem::remove_file_name(file_path.c_str());\n    if (!dsn::utils::filesystem::create_directory(path)) {\n        dassert(false, \"create directory %s failed\", path.c_str());\n    }\n\n    if (!fc->file_holder->file_handle) {\n        // double check\n        zauto_lock l(fc->user_req->user_req_lock);\n        if (!fc->file_holder->file_handle) {\n            fc->file_holder->file_handle =\n                file::open(file_path.c_str(), O_RDWR | O_CREAT | O_BINARY, 0666);\n        }\n    }\n\n    if (!fc->file_holder->file_handle) {\n        --_concurrent_local_write_count;\n        derror(\"open file %s failed\", file_path.c_str());\n        handle_completion(fc->user_req, ERR_FILE_OPERATION_FAILED);\n    } else {\n        zauto_lock l(reqc->lock);\n        if (reqc->is_valid) {\n            reqc->local_write_task = file::write(fc->file_holder->file_handle,\n                                                 reqc->response.file_content.data(),\n                                                 reqc->response.size,\n                                                 reqc->response.offset,\n                                                 LPC_NFS_WRITE,\n                                                 &_tracker,\n                                                 [=](error_code err, int sz) {\n                                                     end_write(err, sz, reqc);\n                                                     // reset task to release memory quickly.\n                                                     // should do this after local_write_callback()\n                                                     // done.\n                                                     {\n                                                         ::dsn::task_ptr tsk;\n                                                         zauto_lock l(reqc->lock);\n                                                         tsk = std::move(reqc->local_write_task);\n                                                     }\n                                                 });\n        } else {\n            --_concurrent_local_write_count;\n        }\n    }\n}\n\nvoid nfs_client_impl::end_write(error_code err, size_t sz, const copy_request_ex_ptr &reqc)\n{\n    --_concurrent_local_write_count;\n\n    // clear content to release memory quickly\n    reqc->response.file_content = blob();\n\n    const file_context_ptr &fc = reqc->file_ctx;\n\n    bool completed = false;\n    if (err != ERR_OK) {\n        _recent_write_fail_count->increment();\n\n        derror(\"{nfs_service} local write failed, dir = %s, file = %s, err = %s\",\n               fc->user_req->file_size_req.dst_dir.c_str(),\n               fc->file_name.c_str(),\n               err.to_string());\n        completed = true;\n    } else {\n        _recent_write_data_size->add(sz);\n\n        file_wrapper_ptr temp_holder;\n        zauto_lock l(fc->user_req->user_req_lock);\n        if (!fc->user_req->is_finished &&\n            ++fc->finished_segments == (int)fc->copy_requests.size()) {\n            // release file to make it closed immediately after write done.\n            // we use temp_holder to make file closing out of lock.\n            temp_holder = std::move(fc->file_holder);\n\n            if (++fc->user_req->finished_files == (int)fc->user_req->file_contexts.size()) {\n                completed = true;\n            }\n        }\n    }\n\n    if (completed) {\n        handle_completion(fc->user_req, err);\n    }\n\n    continue_write();\n    continue_copy();\n}\n\nvoid nfs_client_impl::handle_completion(const user_request_ptr &req, error_code err)\n{\n    // ATTENTION: only here we may lock for two level (user_req_lock -> copy_request_ex.lock)\n    zauto_lock l(req->user_req_lock);\n\n    // make sure this function can only be executed for once\n    if (req->is_finished)\n        return;\n    req->is_finished = true;\n\n    size_t total_size = 0;\n    for (file_context_ptr &fc : req->file_contexts) {\n        total_size += fc->file_size;\n        if (err != ERR_OK) {\n            // mark all copy_requests to be invalid\n            for (const copy_request_ex_ptr &rc : fc->copy_requests) {\n                zauto_lock l(rc->lock);\n                rc->is_valid = false;\n            }\n        }\n        // clear copy_requests to break circle reference\n        fc->copy_requests.clear();\n    }\n\n    // clear file_contexts to break circle reference\n    req->file_contexts.clear();\n\n    // notify aio_task\n    req->nfs_task->enqueue(err, err == ERR_OK ? total_size : 0);\n}\n\n// todo(jiashuo1) just for compatibility with scripts, such as\n// https://github.com/apache/incubator-pegasus/blob/v2.3/scripts/pegasus_offline_node_list.sh\nvoid nfs_client_impl::register_cli_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _nfs_max_copy_rate_megabytes_cmd = dsn::command_manager::instance().register_command(\n            {\"nfs.max_copy_rate_megabytes_per_disk\"},\n            \"nfs.max_copy_rate_megabytes_per_disk [num]\",\n            \"control the max rate(MB/s) for one disk to copy file from remote node\",\n            [](const std::vector<std::string> &args) {\n                std::string result(\"OK\");\n\n                if (args.empty()) {\n                    return std::to_string(FLAGS_max_copy_rate_megabytes_per_disk);\n                }\n\n                int32_t max_copy_rate_megabytes = 0;\n                if (!dsn::buf2int32(args[0], max_copy_rate_megabytes) ||\n                    max_copy_rate_megabytes <= 0) {\n                    return std::string(\"ERR: invalid arguments\");\n                }\n\n                uint32_t max_copy_rate_bytes = max_copy_rate_megabytes << 20;\n                if (max_copy_rate_bytes <= FLAGS_nfs_copy_block_bytes) {\n                    result = std::string(\"ERR: max_copy_rate_bytes(max_copy_rate_megabytes << 20) \"\n                                         \"should be greater than nfs_copy_block_bytes:\")\n                                 .append(std::to_string(FLAGS_nfs_copy_block_bytes));\n                    return result;\n                }\n                FLAGS_max_copy_rate_megabytes_per_disk = max_copy_rate_megabytes;\n                return result;\n            });\n    });\n}\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/nfs_client_impl.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <vector>\n#include <deque>\n#include <iostream>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/dist/nfs_node.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/TokenBucket.h>\n#include <dsn/utility/flags.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utils/token_buckets.h>\n\n#include \"nfs_types.h\"\n#include \"nfs_code_definition.h\"\n\nnamespace dsn {\nnamespace service {\n\nusing TokenBucket = folly::BasicTokenBucket<std::chrono::steady_clock>;\n\ntemplate <typename TCallback>\ntask_ptr async_nfs_get_file_size(const get_file_size_request &request,\n                                 TCallback &&callback,\n                                 std::chrono::milliseconds timeout,\n                                 rpc_address server_addr)\n{\n    return rpc::call(server_addr,\n                     RPC_NFS_GET_FILE_SIZE,\n                     request,\n                     nullptr,\n                     std::forward<TCallback>(callback),\n                     timeout);\n}\n\ntemplate <typename TCallback>\ntask_ptr async_nfs_copy(const copy_request &request,\n                        TCallback &&callback,\n                        std::chrono::milliseconds timeout,\n                        rpc_address server_addr)\n{\n    return rpc::call(\n        server_addr, RPC_NFS_COPY, request, nullptr, std::forward<TCallback>(callback), timeout);\n}\n\nclass nfs_client_impl\n{\npublic:\n    struct user_request;\n    struct file_context;\n    struct copy_request_ex;\n    struct file_wrapper;\n\n    typedef ::dsn::ref_ptr<user_request> user_request_ptr;\n    typedef ::dsn::ref_ptr<file_context> file_context_ptr;\n    typedef ::dsn::ref_ptr<copy_request_ex> copy_request_ex_ptr;\n    typedef ::dsn::ref_ptr<file_wrapper> file_wrapper_ptr;\n\n    struct file_wrapper : public ::dsn::ref_counter\n    {\n        disk_file *file_handle;\n\n        file_wrapper() { file_handle = nullptr; }\n        ~file_wrapper()\n        {\n            if (file_handle != nullptr) {\n                auto err = file::close(file_handle);\n                dassert(err == ERR_OK, \"file::close failed, err = %s\", err.to_string());\n            }\n        }\n    };\n\n    struct copy_request_ex : public ::dsn::ref_counter\n    {\n        file_context_ptr file_ctx; // reference to the owner\n        int index;\n        uint64_t offset;\n        uint32_t size;\n        bool is_last;\n        copy_response response;\n        ::dsn::task_ptr remote_copy_task;\n        ::dsn::task_ptr local_write_task;\n        bool is_ready_for_write;\n        bool is_valid;\n        int retry_count;\n        zlock lock; // to protect is_valid\n\n        copy_request_ex(const file_context_ptr &file, int idx, int try_count)\n        {\n            file_ctx = file;\n            index = idx;\n            offset = 0;\n            size = 0;\n            is_last = false;\n            is_ready_for_write = false;\n            is_valid = true;\n            retry_count = try_count;\n        }\n    };\n\n    struct file_context : public ::dsn::ref_counter\n    {\n        user_request_ptr user_req; // reference to the owner\n\n        std::string file_name;\n        uint64_t file_size;\n\n        file_wrapper_ptr file_holder;\n        int current_write_index;\n        int finished_segments;\n        std::vector<copy_request_ex_ptr> copy_requests;\n\n        file_context(const user_request_ptr &req, const std::string &file_nm, uint64_t sz)\n        {\n            user_req = req;\n            file_name = file_nm;\n            file_size = sz;\n            file_holder = new file_wrapper();\n            current_write_index = -1;\n            finished_segments = 0;\n        }\n    };\n\n    struct user_request : public ::dsn::ref_counter\n    {\n        zlock user_req_lock;\n\n        bool high_priority;\n        int low_queue_index;\n        get_file_size_request file_size_req;\n        ::dsn::ref_ptr<aio_task> nfs_task;\n        std::atomic<int> finished_files;\n        std::atomic<int> concurrent_copy_count;\n        bool is_finished;\n\n        std::vector<file_context_ptr> file_contexts;\n\n        user_request()\n        {\n            high_priority = false;\n            low_queue_index = -1;\n            finished_files = 0;\n            concurrent_copy_count = 0;\n            is_finished = false;\n        }\n    };\n\n    struct random_robin_queue\n    {\n        int max_concurrent_copy_count_per_queue;\n        size_t total_count;\n        // each queue represents all requests for one user_request\n        std::list<std::deque<copy_request_ex_ptr>> queue_list;\n        // the next queue to pop request\n        std::list<std::deque<copy_request_ex_ptr>>::iterator pop_it;\n\n        random_robin_queue(int max_concurrent_copy_count_per_queue_)\n        {\n            max_concurrent_copy_count_per_queue = max_concurrent_copy_count_per_queue_;\n            total_count = 0;\n            pop_it = queue_list.end();\n        }\n\n        // push request queue as an unique sub-queue.\n        void push(std::deque<copy_request_ex_ptr> &&q)\n        {\n            total_count += q.size();\n            queue_list.emplace_back(std::move(q));\n        }\n\n        // push retry request to this queue.\n        // if the original sub-queue is exist, push to front of it,\n        // else push to a new sub-queue.\n        void push_retry(const copy_request_ex_ptr &p)\n        {\n            total_count++;\n            for (auto it = queue_list.begin(); it != queue_list.end(); ++it) {\n                if (it->front()->file_ctx->user_req.get() == p->file_ctx->user_req.get()) {\n                    // belong the the same user_request\n                    it->push_front(p);\n                    return;\n                }\n            }\n            queue_list.emplace_back(std::deque<copy_request_ex_ptr>({p}));\n        }\n\n        // pop one request from this queue.\n        // return nullptr if no valid request found.\n        copy_request_ex_ptr pop()\n        {\n            copy_request_ex_ptr p;\n            if (total_count == 0)\n                return p;\n            if (pop_it == queue_list.end())\n                pop_it = queue_list.begin();\n            auto start_it = pop_it;\n            while (true) {\n                if (pop_it->front()->file_ctx->user_req->concurrent_copy_count <\n                    max_concurrent_copy_count_per_queue) {\n                    // ok, find one, pop from queue, and forward pop_it\n                    p = pop_it->front();\n                    pop_it->pop_front();\n                    if (pop_it->empty()) {\n                        pop_it = queue_list.erase(pop_it);\n                    } else {\n                        pop_it++;\n                    }\n                    total_count--;\n                    break;\n                }\n                // forward pop_it\n                pop_it++;\n                if (pop_it == queue_list.end())\n                    pop_it = queue_list.begin();\n                // iterate for a round\n                if (pop_it == start_it)\n                    break;\n            }\n            return p;\n        }\n\n        bool empty() { return total_count == 0; }\n    };\n\npublic:\n    nfs_client_impl();\n    virtual ~nfs_client_impl();\n\n    // copy file request entry\n    void begin_remote_copy(std::shared_ptr<remote_copy_request> &rci, aio_task *nfs_task);\n\nprivate:\n    void end_get_file_size(::dsn::error_code err,\n                           const ::dsn::service::get_file_size_response &resp,\n                           const user_request_ptr &ureq);\n\n    void continue_copy();\n\n    void\n    end_copy(::dsn::error_code err, const copy_response &resp, const copy_request_ex_ptr &reqc);\n\n    void continue_write();\n\n    void end_write(error_code err, size_t sz, const copy_request_ex_ptr &reqc);\n\n    void handle_completion(const user_request_ptr &req, error_code err);\n\n    void register_cli_commands();\n\nprivate:\n    std::unique_ptr<dsn::utils::token_buckets>\n        _copy_token_buckets; // rate limiter of copy from remote\n\n    std::atomic<int> _concurrent_copy_request_count; // record concurrent request count, limited\n                                                     // by max_concurrent_remote_copy_requests.\n    std::atomic<int> _concurrent_local_write_count;  // record concurrent write count, limited\n                                                     // by max_concurrent_local_writes.\n    std::atomic<int> _buffered_local_write_count;    // record current buffered write count, limited\n                                                     // by max_buffered_local_writes.\n\n    zlock _copy_requests_lock;\n    std::deque<copy_request_ex_ptr> _copy_requests_high;\n    random_robin_queue _copy_requests_low;\n    int _high_priority_remaining_time;\n\n    zlock _local_writes_lock;\n    std::deque<copy_request_ex_ptr> _local_writes;\n\n    perf_counter_wrapper _recent_copy_data_size;\n    perf_counter_wrapper _recent_copy_fail_count;\n    perf_counter_wrapper _recent_write_data_size;\n    perf_counter_wrapper _recent_write_fail_count;\n\n    dsn_handle_t _nfs_max_copy_rate_megabytes_cmd;\n\n    dsn::task_tracker _tracker;\n};\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/nfs_code_definition.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#pragma once\n\nnamespace dsn {\nnamespace service {\n// define RPC task code for service 'nfs'\nDEFINE_TASK_CODE_RPC(RPC_NFS_COPY, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE_RPC(RPC_NFS_GET_FILE_SIZE, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\n// test timer task code\nDEFINE_TASK_CODE(LPC_NFS_REQUEST_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\n\nDEFINE_TASK_CODE_AIO(LPC_NFS_READ, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE(LPC_NFS_FILE_CLOSE_TIMER, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nDEFINE_TASK_CODE_AIO(LPC_NFS_WRITE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nDEFINE_TASK_CODE_AIO(LPC_NFS_COPY_FILE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n}\n}\n"
  },
  {
    "path": "src/nfs/nfs_node.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/nfs_node.h>\n\n#include \"nfs_node_simple.h\"\n\nnamespace dsn {\n\nstd::unique_ptr<nfs_node> nfs_node::create()\n{\n    return dsn::make_unique<dsn::service::nfs_node_simple>();\n}\n\naio_task_ptr nfs_node::copy_remote_directory(const rpc_address &remote,\n                                             const std::string &source_disk_tag,\n                                             const std::string &source_dir,\n                                             const std::string &dest_disk_tag,\n                                             const std::string &dest_dir,\n                                             bool overwrite,\n                                             bool high_priority,\n                                             task_code callback_code,\n                                             task_tracker *tracker,\n                                             aio_handler &&callback,\n                                             int hash)\n{\n    return copy_remote_files(remote,\n                             source_disk_tag,\n                             source_dir,\n                             {},\n                             dest_disk_tag,\n                             dest_dir,\n                             overwrite,\n                             high_priority,\n                             callback_code,\n                             tracker,\n                             std::move(callback),\n                             hash);\n}\n\naio_task_ptr nfs_node::copy_remote_files(const rpc_address &remote,\n                                         const std::string &source_disk_tag,\n                                         const std::string &source_dir,\n                                         const std::vector<std::string> &files,\n                                         const std::string &dest_disk_tag,\n                                         const std::string &dest_dir,\n                                         bool overwrite,\n                                         bool high_priority,\n                                         task_code callback_code,\n                                         task_tracker *tracker,\n                                         aio_handler &&callback,\n                                         int hash)\n{\n    auto cb = dsn::file::create_aio_task(callback_code, tracker, std::move(callback), hash);\n\n    std::shared_ptr<remote_copy_request> rci = std::make_shared<remote_copy_request>();\n    rci->source = remote;\n    rci->source_disk_tag = source_disk_tag;\n    rci->source_dir = source_dir;\n    rci->files = files;\n    rci->dest_disk_tag = dest_disk_tag;\n    rci->dest_dir = dest_dir;\n    rci->overwrite = overwrite;\n    rci->high_priority = high_priority;\n    call(rci, cb);\n\n    return cb;\n}\n\naio_task_ptr nfs_node::copy_remote_files(std::shared_ptr<remote_copy_request> &request,\n                                         task_code callback_code,\n                                         task_tracker *tracker,\n                                         aio_handler &&callback,\n                                         int hash)\n{\n    auto cb = dsn::file::create_aio_task(callback_code, tracker, std::move(callback), hash);\n    call(request, cb);\n    return cb;\n}\n}\n"
  },
  {
    "path": "src/nfs/nfs_node_impl.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"nfs_node_simple.h\"\n#include \"nfs_client_impl.h\"\n#include \"nfs_server_impl.h\"\n\nnamespace dsn {\nnamespace service {\n\nnfs_node_simple::nfs_node_simple() : nfs_node()\n{\n    _server = nullptr;\n    _client = nullptr;\n}\n\nnfs_node_simple::~nfs_node_simple() { stop(); }\n\nvoid nfs_node_simple::call(std::shared_ptr<remote_copy_request> rci, aio_task *callback)\n{\n    _client->begin_remote_copy(rci, callback); // copy file request entry\n}\n\nerror_code nfs_node_simple::start()\n{\n    _server = new nfs_service_impl();\n    _server->open_service();\n\n    _client = new nfs_client_impl();\n    return ERR_OK;\n}\n\nerror_code nfs_node_simple::stop()\n{\n    delete _server;\n    _server = nullptr;\n\n    delete _client;\n    _client = nullptr;\n\n    return ERR_OK;\n}\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/nfs_node_simple.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <dsn/dist/nfs_node.h>\n\nnamespace dsn {\nnamespace service {\n\nclass nfs_service_impl;\nclass nfs_client_impl;\n\nclass nfs_node_simple : public nfs_node\n{\npublic:\n    nfs_node_simple();\n\n    virtual ~nfs_node_simple();\n\n    virtual void call(std::shared_ptr<remote_copy_request> rci, aio_task *callback) override;\n\n    virtual ::dsn::error_code start() override;\n\n    virtual error_code stop() override;\n\nprivate:\n    nfs_service_impl *_server;\n    nfs_client_impl *_client;\n};\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/nfs_server_impl.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"nfs_server_impl.h\"\n\n#include <sys/stat.h>\n#include <fcntl.h>\n\n#include <cstdlib>\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/tool-api/async_calls.h>\n\nnamespace dsn {\nnamespace service {\n\nDSN_DEFINE_uint32(\n    \"nfs\",\n    max_send_rate_megabytes_per_disk,\n    0,\n    \"max rate per disk of send to remote node(MB/s)，zero means disable rate limiter\");\nDSN_TAG_VARIABLE(max_send_rate_megabytes_per_disk, FT_MUTABLE);\n\nDSN_DECLARE_int32(file_close_timer_interval_ms_on_server);\nDSN_DECLARE_int32(file_close_expire_time_ms);\n\nnfs_service_impl::nfs_service_impl() : ::dsn::serverlet<nfs_service_impl>(\"nfs\")\n{\n    _file_close_timer = ::dsn::tasking::enqueue_timer(\n        LPC_NFS_FILE_CLOSE_TIMER,\n        &_tracker,\n        [this] { close_file(); },\n        std::chrono::milliseconds(FLAGS_file_close_timer_interval_ms_on_server));\n\n    _recent_copy_data_size.init_app_counter(\"eon.nfs_server\",\n                                            \"recent_copy_data_size\",\n                                            COUNTER_TYPE_VOLATILE_NUMBER,\n                                            \"nfs server copy data size in the recent period\");\n    _recent_copy_fail_count.init_app_counter(\n        \"eon.nfs_server\",\n        \"recent_copy_fail_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"nfs server copy fail count count in the recent period\");\n\n    _send_token_buckets = std::make_unique<dsn::utils::token_buckets>();\n    register_cli_commands();\n}\n\nvoid nfs_service_impl::on_copy(const ::dsn::service::copy_request &request,\n                               ::dsn::rpc_replier<::dsn::service::copy_response> &reply)\n{\n    // dinfo(\">>> on call RPC_COPY end, exec RPC_NFS_COPY\");\n\n    std::string file_path =\n        dsn::utils::filesystem::path_combine(request.source_dir, request.file_name);\n    disk_file *hfile;\n\n    {\n        zauto_lock l(_handles_map_lock);\n        auto it = _handles_map.find(file_path); // find file handle cache first\n\n        if (it == _handles_map.end()) // not found\n        {\n            hfile = file::open(file_path.c_str(), O_RDONLY | O_BINARY, 0);\n            if (hfile) {\n\n                auto fh = std::make_shared<file_handle_info_on_server>();\n                fh->file_handle = hfile;\n                fh->file_access_count = 1;\n                fh->last_access_time = dsn_now_ms();\n                _handles_map.insert(std::make_pair(file_path, std::move(fh)));\n            }\n        } else // found\n        {\n            hfile = it->second->file_handle;\n            it->second->file_access_count++;\n            it->second->last_access_time = dsn_now_ms();\n        }\n    }\n\n    dinfo(\"nfs: copy file %s [%\" PRId64 \", %\" PRId64 \")\",\n          file_path.c_str(),\n          request.offset,\n          request.offset + request.size);\n\n    if (hfile == 0) {\n        derror(\"{nfs_service} open file %s failed\", file_path.c_str());\n        ::dsn::service::copy_response resp;\n        resp.error = ERR_OBJECT_NOT_FOUND;\n        reply(resp);\n        return;\n    }\n\n    std::shared_ptr<callback_para> cp = std::make_shared<callback_para>(std::move(reply));\n    cp->bb = blob(dsn::utils::make_shared_array<char>(request.size), request.size);\n    cp->dst_dir = request.dst_dir;\n    cp->source_disk_tag = request.source_disk_tag;\n    cp->file_path = std::move(file_path);\n    cp->hfile = hfile;\n    cp->offset = request.offset;\n    cp->size = request.size;\n\n    auto buffer_save = cp->bb.buffer().get();\n\n    file::read(\n        hfile,\n        buffer_save,\n        request.size,\n        request.offset,\n        LPC_NFS_READ,\n        &_tracker,\n        [this, cp](error_code err, size_t sz) mutable { internal_read_callback(err, sz, *cp); });\n}\n\nvoid nfs_service_impl::internal_read_callback(error_code err, size_t sz, callback_para &cp)\n{\n    if (FLAGS_max_send_rate_megabytes_per_disk > 0) {\n        _send_token_buckets->get_token_bucket(cp.source_disk_tag)\n            ->consumeWithBorrowAndWait(sz,\n                                       FLAGS_max_send_rate_megabytes_per_disk << 20,\n                                       1.5 * (FLAGS_max_send_rate_megabytes_per_disk << 20));\n    }\n\n    {\n        zauto_lock l(_handles_map_lock);\n        auto it = _handles_map.find(cp.file_path);\n\n        if (it != _handles_map.end()) {\n            it->second->file_access_count--;\n        }\n    }\n\n    if (err != ERR_OK) {\n        derror(\n            \"{nfs_service} read file %s failed, err = %s\", cp.file_path.c_str(), err.to_string());\n        _recent_copy_fail_count->increment();\n    } else {\n        _recent_copy_data_size->add(sz);\n    }\n\n    ::dsn::service::copy_response resp;\n    resp.error = err;\n    resp.file_content = std::move(cp.bb);\n    resp.offset = cp.offset;\n    resp.size = cp.size;\n\n    cp.replier(resp);\n}\n\n// RPC_NFS_NEW_NFS_GET_FILE_SIZE\nvoid nfs_service_impl::on_get_file_size(\n    const ::dsn::service::get_file_size_request &request,\n    ::dsn::rpc_replier<::dsn::service::get_file_size_response> &reply)\n{\n    // dinfo(\">>> on call RPC_NFS_GET_FILE_SIZE end, exec RPC_NFS_GET_FILE_SIZE\");\n\n    get_file_size_response resp;\n    error_code err = ERR_OK;\n    std::vector<std::string> file_list;\n    std::string folder = request.source_dir;\n    if (request.file_list.size() == 0) // return all file size in the destination file folder\n    {\n        if (!dsn::utils::filesystem::directory_exists(folder)) {\n            derror(\"{nfs_service} directory %s not exist\", folder.c_str());\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            if (!dsn::utils::filesystem::get_subfiles(folder, file_list, true)) {\n                derror(\"{nfs_service} get subfiles of directory %s failed\", folder.c_str());\n                err = ERR_FILE_OPERATION_FAILED;\n            } else {\n                for (auto &fpath : file_list) {\n                    // TODO: using uint64 instead as file ma\n                    // Done\n                    int64_t sz;\n                    if (!dsn::utils::filesystem::file_size(fpath, sz)) {\n                        derror(\"{nfs_service} get size of file %s failed\", fpath.c_str());\n                        err = ERR_FILE_OPERATION_FAILED;\n                        break;\n                    }\n\n                    resp.size_list.push_back((uint64_t)sz);\n                    resp.file_list.push_back(\n                        fpath.substr(request.source_dir.length(), fpath.length() - 1));\n                }\n                file_list.clear();\n            }\n        }\n    } else // return file size in the request file folder\n    {\n        for (size_t i = 0; i < request.file_list.size(); i++) {\n            std::string file_path =\n                dsn::utils::filesystem::path_combine(folder, request.file_list[i]);\n\n            struct stat st;\n            if (0 != ::stat(file_path.c_str(), &st)) {\n                derror(\"{nfs_service} get stat of file %s failed, err = %s\",\n                       file_path.c_str(),\n                       strerror(errno));\n                err = ERR_OBJECT_NOT_FOUND;\n                break;\n            }\n\n            // TODO: using int64 instead as file may exceed the size of 32bit\n            // Done\n            uint64_t size = st.st_size;\n\n            resp.size_list.push_back(size);\n            resp.file_list.push_back((folder + request.file_list[i])\n                                         .substr(request.source_dir.length(),\n                                                 (folder + request.file_list[i]).length() - 1));\n        }\n    }\n\n    resp.error = err;\n    reply(resp);\n}\n\nvoid nfs_service_impl::close_file() // release out-of-date file handle\n{\n    zauto_lock l(_handles_map_lock);\n\n    for (auto it = _handles_map.begin(); it != _handles_map.end();) {\n        auto fptr = it->second;\n\n        // not used and expired\n        if (fptr->file_access_count == 0 &&\n            dsn_now_ms() - fptr->last_access_time > (uint64_t)FLAGS_file_close_expire_time_ms) {\n            dinfo(\"nfs: close file handle %s\", it->first.c_str());\n            it = _handles_map.erase(it);\n        } else\n            it++;\n    }\n}\n\n// TODO(jiashuo1):  just for compatibility with scripts, such as\n// https://github.com/apache/incubator-pegasus/blob/v2.3/scripts/pegasus_offline_node_list.sh\nvoid nfs_service_impl::register_cli_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _nfs_max_send_rate_megabytes_cmd = dsn::command_manager::instance().register_command(\n            {\"nfs.max_send_rate_megabytes_per_disk\"},\n            \"nfs.max_send_rate_megabytes_per_disk [num]\",\n            \"control the max rate(MB/s) for one disk to send file to remote node\",\n            [](const std::vector<std::string> &args) {\n                std::string result(\"OK\");\n\n                if (args.empty()) {\n                    return std::to_string(FLAGS_max_send_rate_megabytes_per_disk);\n                }\n\n                int32_t max_send_rate_megabytes = 0;\n                if (!dsn::buf2int32(args[0], max_send_rate_megabytes) ||\n                    max_send_rate_megabytes <= 0) {\n                    return std::string(\"ERR: invalid arguments\");\n                }\n\n                FLAGS_max_send_rate_megabytes_per_disk = max_send_rate_megabytes;\n                return result;\n            });\n    });\n}\n\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/nfs_server_impl.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <iostream>\n#include <dsn/cpp/serverlet.h>\n#include <dsn/utility/flags.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/utils/token_buckets.h>\n\n#include \"nfs_code_definition.h\"\n#include \"nfs_types.h\"\n#include \"nfs_client_impl.h\"\n\nnamespace dsn {\nnamespace service {\nclass nfs_service_impl : public ::dsn::serverlet<nfs_service_impl>\n{\npublic:\n    nfs_service_impl();\n    virtual ~nfs_service_impl() { _tracker.cancel_outstanding_tasks(); }\n\n    void open_service()\n    {\n        register_async_rpc_handler(RPC_NFS_COPY, \"copy\", &nfs_service_impl::on_copy);\n        register_async_rpc_handler(\n            RPC_NFS_GET_FILE_SIZE, \"get_file_size\", &nfs_service_impl::on_get_file_size);\n    }\n\n    void register_cli_commands();\n\n    void close_service()\n    {\n        unregister_rpc_handler(RPC_NFS_COPY);\n        unregister_rpc_handler(RPC_NFS_GET_FILE_SIZE);\n        UNREGISTER_VALID_HANDLER(_nfs_max_send_rate_megabytes_cmd);\n    }\n\nprotected:\n    // RPC_NFS_V2_NFS_COPY\n    virtual void on_copy(const copy_request &request, ::dsn::rpc_replier<copy_response> &reply);\n    // RPC_NFS_V2_NFS_GET_FILE_SIZE\n    virtual void on_get_file_size(const get_file_size_request &request,\n                                  ::dsn::rpc_replier<get_file_size_response> &reply);\n\nprivate:\n    struct callback_para\n    {\n        dsn_handle_t hfile;\n        std::string source_disk_tag;\n        std::string file_path;\n        std::string dst_dir;\n        blob bb;\n        uint64_t offset;\n        uint32_t size;\n        rpc_replier<copy_response> replier;\n\n        callback_para(rpc_replier<copy_response> &&r)\n            : hfile(nullptr), offset(0), size(0), replier(std::move(r))\n        {\n        }\n        callback_para(callback_para &&r)\n            : hfile(r.hfile),\n              file_path(std::move(r.file_path)),\n              dst_dir(std::move(r.dst_dir)),\n              bb(std::move(r.bb)),\n              offset(r.offset),\n              size(r.size),\n              replier(std::move(r.replier))\n        {\n            r.hfile = nullptr;\n            r.offset = 0;\n            r.size = 0;\n        }\n    };\n\n    struct file_handle_info_on_server\n    {\n        disk_file *file_handle;\n        int32_t file_access_count; // concurrent r/w count\n        uint64_t last_access_time; // last touch time\n\n        file_handle_info_on_server()\n            : file_handle(nullptr), file_access_count(0), last_access_time(0)\n        {\n        }\n\n        ~file_handle_info_on_server()\n        {\n            error_code err = file::close(file_handle);\n            dassert(err == ERR_OK, \"file::close failed, err = %s\", err.to_string());\n        }\n    };\n\n    void internal_read_callback(error_code err, size_t sz, callback_para &cp);\n\n    void close_file();\n\nprivate:\n    zlock _handles_map_lock;\n    std::unordered_map<std::string, std::shared_ptr<file_handle_info_on_server>>\n        _handles_map; // cache file handles\n\n    ::dsn::task_ptr _file_close_timer;\n\n    std::unique_ptr<dsn::utils::token_buckets>\n        _send_token_buckets; // rate limiter of send to remote\n\n    perf_counter_wrapper _recent_copy_data_size;\n    perf_counter_wrapper _recent_copy_fail_count;\n\n    dsn_handle_t _nfs_max_send_rate_megabytes_cmd;\n\n    dsn::task_tracker _tracker;\n};\n} // namespace service\n} // namespace dsn\n"
  },
  {
    "path": "src/nfs/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_nfs_test)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_nfs dsn_runtime gtest dsn_aio)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/nfs_test_file1\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/nfs_test_file2\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/nfs/test/clear.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf data nfs_test_dir nfs_test_dir_copy dsn_nfs_test.xml\n"
  },
  {
    "path": "src/nfs/test/config.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n\n[apps.mimic]\ntype = dsn.app.mimic\narguments =\nports = 20101\npools = THREAD_POOL_DEFAULT\nrun = true\ncount = 1\n\n[core]\nenable_default_app_mimic = true\ntool = nativerun\npause_on_start = false\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n"
  },
  {
    "path": "src/nfs/test/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_c.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/nfs_node.h>\n\nusing namespace dsn;\n\nDEFINE_TASK_CODE_AIO(LPC_AIO_TEST_NFS, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nstruct aio_result\n{\n    dsn::error_code err;\n    size_t sz;\n};\n\nTEST(nfs, basic)\n{\n    std::unique_ptr<dsn::nfs_node> nfs(dsn::nfs_node::create());\n    nfs->start();\n\n    utils::filesystem::remove_path(\"nfs_test_dir\");\n    utils::filesystem::remove_path(\"nfs_test_dir_copy\");\n\n    ASSERT_FALSE(utils::filesystem::directory_exists(\"nfs_test_dir\"));\n    ASSERT_FALSE(utils::filesystem::directory_exists(\"nfs_test_dir_copy\"));\n\n    ASSERT_TRUE(utils::filesystem::create_directory(\"nfs_test_dir\"));\n    ASSERT_TRUE(utils::filesystem::directory_exists(\"nfs_test_dir\"));\n\n    {\n        // copy nfs_test_file1 nfs_test_file2 nfs_test_dir\n        ASSERT_FALSE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file1\"));\n        ASSERT_FALSE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file2\"));\n\n        std::vector<std::string> files{\"nfs_test_file1\", \"nfs_test_file2\"};\n\n        aio_result r;\n        dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::rpc_address(\"localhost\", 20101),\n                                                     \"default\",\n                                                     \".\",\n                                                     files,\n                                                     \"default\",\n                                                     \"nfs_test_dir\",\n                                                     false,\n                                                     false,\n                                                     LPC_AIO_TEST_NFS,\n                                                     nullptr,\n                                                     [&r](dsn::error_code err, size_t sz) {\n                                                         r.err = err;\n                                                         r.sz = sz;\n                                                     },\n                                                     0);\n        ASSERT_NE(nullptr, t);\n        ASSERT_TRUE(t->wait(20000));\n        ASSERT_EQ(r.err, t->error());\n        ASSERT_EQ(ERR_OK, r.err);\n        ASSERT_EQ(r.sz, t->get_transferred_size());\n\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file1\"));\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file2\"));\n\n        int64_t sz1, sz2;\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_file1\", sz1));\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir/nfs_test_file1\", sz2));\n        ASSERT_EQ(sz1, sz2);\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_file2\", sz1));\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir/nfs_test_file2\", sz2));\n        ASSERT_EQ(sz1, sz2);\n    }\n\n    {\n        // copy files again, overwrite\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file1\"));\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir/nfs_test_file2\"));\n\n        std::vector<std::string> files{\"nfs_test_file1\", \"nfs_test_file2\"};\n\n        aio_result r;\n        dsn::aio_task_ptr t = nfs->copy_remote_files(dsn::rpc_address(\"localhost\", 20101),\n                                                     \"default\",\n                                                     \".\",\n                                                     files,\n                                                     \"default\",\n                                                     \"nfs_test_dir\",\n                                                     true,\n                                                     false,\n                                                     LPC_AIO_TEST_NFS,\n                                                     nullptr,\n                                                     [&r](dsn::error_code err, size_t sz) {\n                                                         r.err = err;\n                                                         r.sz = sz;\n                                                     },\n                                                     0);\n        ASSERT_NE(nullptr, t);\n        ASSERT_TRUE(t->wait(20000));\n        ASSERT_EQ(r.err, t->error());\n        ASSERT_EQ(ERR_OK, r.err);\n        ASSERT_EQ(r.sz, t->get_transferred_size());\n        // this is only true for simulator\n        if (dsn::tools::get_current_tool()->name() == \"simulator\") {\n            ASSERT_EQ(1, t->get_count());\n        }\n    }\n\n    {\n        // copy nfs_test_dir nfs_test_dir_copy\n        ASSERT_FALSE(utils::filesystem::directory_exists(\"nfs_test_dir_copy\"));\n\n        aio_result r;\n        dsn::aio_task_ptr t = nfs->copy_remote_directory(dsn::rpc_address(\"localhost\", 20101),\n                                                         \"default\",\n                                                         \"nfs_test_dir\",\n                                                         \"default\",\n                                                         \"nfs_test_dir_copy\",\n                                                         false,\n                                                         false,\n                                                         LPC_AIO_TEST_NFS,\n                                                         nullptr,\n                                                         [&r](dsn::error_code err, size_t sz) {\n                                                             r.err = err;\n                                                             r.sz = sz;\n                                                         },\n                                                         0);\n        ASSERT_NE(nullptr, t);\n        ASSERT_TRUE(t->wait(20000));\n        ASSERT_EQ(r.err, t->error());\n        ASSERT_EQ(ERR_OK, r.err);\n        ASSERT_EQ(r.sz, t->get_transferred_size());\n\n        ASSERT_TRUE(utils::filesystem::directory_exists(\"nfs_test_dir_copy\"));\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir_copy/nfs_test_file1\"));\n        ASSERT_TRUE(utils::filesystem::file_exists(\"nfs_test_dir_copy/nfs_test_file2\"));\n\n        std::vector<std::string> sub1, sub2;\n        ASSERT_TRUE(utils::filesystem::get_subfiles(\"nfs_test_dir\", sub1, true));\n        ASSERT_TRUE(utils::filesystem::get_subfiles(\"nfs_test_dir_copy\", sub2, true));\n        ASSERT_EQ(sub1.size(), sub2.size());\n\n        int64_t sz1, sz2;\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir/nfs_test_file1\", sz1));\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir_copy/nfs_test_file1\", sz2));\n        ASSERT_EQ(sz1, sz2);\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir/nfs_test_file2\", sz1));\n        ASSERT_TRUE(utils::filesystem::file_size(\"nfs_test_dir_copy/nfs_test_file2\", sz2));\n        ASSERT_EQ(sz1, sz2);\n    }\n\n    nfs->stop();\n}\n\nint g_test_ret = 0;\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n    dsn_run_config(\"config.ini\", false);\n    g_test_ret = RUN_ALL_TESTS();\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/nfs/test/nfs_test_file1",
    "content": "[apps..default]\nrun = true\ncount = 1\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536\nnetwork.client.RPC_CHANNEL_UDP = dsn::tools::asio_network_provider, 65536\nnetwork.server.0.RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536\n\n[apps.client]\nname = client\ntype = test\narguments = localhost 20101\nrun = true\nports = \ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER, THREAD_POOL_FOR_TEST_1, THREAD_POOL_FOR_TEST_2\n\n[apps.server]\nname = server\ntype = test\narguments =\nports = 20101,20102\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20101.RPC_CHANNEL_TCP = NET_HDR_DSN,dsn::tools::asio_network_provider,65536\nnetwork.server.20102.RPC_CHANNEL_TCP = NET_HDR_DSN,dsn::tools::asio_network_provider,65536\nnetwork.server.20103.RPC_CHANNEL_TCP = NET_HDR_DSN,dsn::tools::asio_network_provider,65536\n\n[apps.server_group]\nname = server_group\ntype = test\narguments =\nports = 20201\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[apps.server_not_run]\nname = server_not_run\ntype = test\narguments =\nports = 20301\nrun = false\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[core]\n;tool = simulator\ntool = nativerun\n\ntoollets = tracer, profiler, fault_injector\npause_on_start = false\ncli_local = true\ncli_remote = true\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simulator]\nrandom_seed = 0\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nfast_execution_in_network_thread = false\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\n; max_input_queue_length = 1024\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_1]\nname = THREAD_POOL_FOR_TEST_1\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_HIGHEST\nworker_share_core = false\nworker_affinity_mask = 1\nmax_input_queue_length = 1024\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_2]\nname = THREAD_POOL_FOR_TEST_2\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_share_core = true\nworker_affinity_mask = 1\nmax_input_queue_length = 1024\npartitioned = true\n\n[core.test]\ncount = 1\nrun = true\n\n"
  },
  {
    "path": "src/nfs/test/nfs_test_file2",
    "content": "[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::sim_network_provider, 65536\n\n[apps.client]\nname = client\ntype = test\narguments = localhost 20101\nrun = true\nports = \ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER, THREAD_POOL_FOR_TEST_1, THREAD_POOL_FOR_TEST_2\n\n[apps.server]\nname = server\ntype = test\narguments =\nports = 20101,20102\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[apps.server_group]\nname = server_group\ntype = test\narguments =\nports = 20201\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[core]\n;tool = simulator\ntool = nativerun\n\ntoollets = tracer, profiler, fault_injector\npause_on_start = true\ncli_local = true\ncli_remote = true\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n\n[tools.simulator]\nrandom_seed = 0\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nfast_execution_in_network_thread = false\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\n; max_input_queue_length = 1024\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_1]\nname = THREAD_POOL_FOR_TEST_1\nworker_count = 2\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_2]\nname = THREAD_POOL_FOR_TEST_2\nworker_count = 2\npartitioned = true\n\n[core.test]\ncount = 1\nrun = true\n\n"
  },
  {
    "path": "src/nfs/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn_nfs_test.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_nfs_test\n"
  },
  {
    "path": "src/perf_counter/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.perf_counter)\n\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_object()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/perf_counter/builtin_counters.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/utils.h>\n#include <dsn/c/api_utilities.h>\n#include \"builtin_counters.h\"\n\nnamespace dsn {\n\nbuiltin_counters::builtin_counters()\n{\n    _memused_virt.init_global_counter(\"replica\",\n                                      \"server\",\n                                      \"memused.virt(MB)\",\n                                      COUNTER_TYPE_NUMBER,\n                                      \"virtual memory usages in MB\");\n    _memused_res.init_global_counter(\"replica\",\n                                     \"server\",\n                                     \"memused.res(MB)\",\n                                     COUNTER_TYPE_NUMBER,\n                                     \"physically memory usages in MB\");\n}\n\nbuiltin_counters::~builtin_counters() {}\n\nvoid builtin_counters::update_counters()\n{\n    double vm_usage;\n    double resident_set;\n    utils::process_mem_usage(vm_usage, resident_set);\n    uint64_t memused_virt = (uint64_t)vm_usage / 1024;\n    uint64_t memused_res = (uint64_t)resident_set / 1024;\n    _memused_virt->set(memused_virt);\n    _memused_res->set(memused_res);\n    ddebug(\"memused_virt = %\" PRIu64 \" MB, memused_res = %\" PRIu64 \"MB\", memused_virt, memused_res);\n}\n}\n"
  },
  {
    "path": "src/perf_counter/builtin_counters.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n\nnamespace dsn {\nclass builtin_counters : public dsn::utils::singleton<builtin_counters>\n{\npublic:\n    builtin_counters();\n    ~builtin_counters();\n    void update_counters();\n\nprivate:\n    dsn::perf_counter_wrapper _memused_virt;\n    dsn::perf_counter_wrapper _memused_res;\n};\n}\n"
  },
  {
    "path": "src/perf_counter/perf_counter.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <cstring>\n#include <dsn/perf_counter/perf_counter.h>\n\nstatic const char *ctypes[] = {\n    \"NUMBER\", \"VOLATILE_NUMBER\", \"RATE\", \"PERCENTILE\", \"INVALID_COUNTER\"};\nconst char *dsn_counter_type_to_string(dsn_perf_counter_type_t t)\n{\n    if (t >= COUNTER_TYPE_COUNT)\n        return ctypes[COUNTER_TYPE_COUNT];\n    return ctypes[t];\n}\n\ndsn_perf_counter_type_t dsn_counter_type_from_string(const char *str)\n{\n    for (int i = 0; i < COUNTER_TYPE_COUNT; ++i) {\n        if (strcmp(str, ctypes[i]) == 0)\n            return (dsn_perf_counter_type_t)i;\n    }\n    return COUNTER_TYPE_INVALID;\n}\n\nstatic const char *ptypes[] = {\"P50\", \"P90\", \"P95\", \"P99\", \"P999\", \"INVALID_PERCENTILE\"};\nconst char *dsn_percentile_type_to_string(dsn_perf_counter_percentile_type_t t)\n{\n    if (t >= COUNTER_PERCENTILE_COUNT)\n        return ptypes[COUNTER_PERCENTILE_COUNT];\n    return ptypes[t];\n}\n\ndsn_perf_counter_percentile_type_t dsn_percentile_type_from_string(const char *str)\n{\n    for (int i = 0; i < COUNTER_PERCENTILE_COUNT; ++i) {\n        if (strcmp(str, ptypes[i]) == 0)\n            return (dsn_perf_counter_percentile_type_t)i;\n    }\n    return COUNTER_PERCENTILE_INVALID;\n}\n"
  },
  {
    "path": "src/perf_counter/perf_counter_atomic.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <atomic>\n#include <boost/make_shared.hpp>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/config_api.h>\n#include <dsn/c/api_utilities.h>\n#include <dsn/perf_counter/perf_counter.h>\n#include <dsn/utils/time_utils.h>\n#include \"utils/shared_io_service.h\"\n\nnamespace dsn {\n\n#pragma pack(push)\n#pragma pack(8)\n\n// -----------   NUMBER perf counter ---------------------------------\n\n#define DIVIDE_CONTAINER 107\nclass perf_counter_number_atomic : public perf_counter\n{\npublic:\n    perf_counter_number_atomic(const char *app,\n                               const char *section,\n                               const char *name,\n                               dsn_perf_counter_type_t type,\n                               const char *dsptr)\n        : perf_counter(app, section, name, type, dsptr)\n    {\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            _val[i].store(0);\n        }\n    }\n    ~perf_counter_number_atomic(void) {}\n\n    virtual void increment()\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_add(1, std::memory_order_relaxed);\n    }\n    virtual void decrement()\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_sub(1, std::memory_order_relaxed);\n    }\n    virtual void add(int64_t val)\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_add(val, std::memory_order_relaxed);\n    }\n    virtual void set(int64_t val)\n    {\n        // the set-op of number is reset the number to zero.\n        // for simplicity, only set other zero, not add the lock to protect, if needed, should add\n        // lock.\n        for (int i = 0; i < DIVIDE_CONTAINER; i++)\n            _val[i].store(0, std::memory_order_relaxed);\n        _val[0].store(val, std::memory_order_relaxed);\n    }\n    virtual double get_value()\n    {\n        double val = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            val += static_cast<double>(_val[i].load(std::memory_order_relaxed));\n        }\n        return val;\n    }\n    virtual int64_t get_integer_value()\n    {\n        int64_t val = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            val += _val[i].load(std::memory_order_relaxed);\n        }\n        return val;\n    }\n    virtual double get_percentile(dsn_perf_counter_percentile_type_t type)\n    {\n        dassert(false, \"invalid execution flow\");\n        return 0.0;\n    }\n\nprotected:\n    std::atomic<int64_t> _val[DIVIDE_CONTAINER];\n};\n\n// -----------   VOLATILE_NUMBER perf counter ---------------------------------\n\nclass perf_counter_volatile_number_atomic : public perf_counter_number_atomic\n{\npublic:\n    perf_counter_volatile_number_atomic(const char *app,\n                                        const char *section,\n                                        const char *name,\n                                        dsn_perf_counter_type_t type,\n                                        const char *dsptr)\n        : perf_counter_number_atomic(app, section, name, type, dsptr)\n    {\n    }\n    ~perf_counter_volatile_number_atomic(void) {}\n\n    virtual double get_value()\n    {\n        double val = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            val += static_cast<double>(_val[i].exchange(0, std::memory_order_relaxed));\n        }\n        return val;\n    }\n    virtual int64_t get_integer_value()\n    {\n        int64_t val = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            val += _val[i].exchange(0, std::memory_order_relaxed);\n        }\n        return val;\n    }\n};\n\n// -----------   RATE perf counter ---------------------------------\n\nclass perf_counter_rate_atomic : public perf_counter\n{\npublic:\n    perf_counter_rate_atomic(const char *app,\n                             const char *section,\n                             const char *name,\n                             dsn_perf_counter_type_t type,\n                             const char *dsptr)\n        : perf_counter(app, section, name, type, dsptr), _rate(0)\n    {\n        _last_time = utils::get_current_physical_time_ns();\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            _val[i].store(0, std::memory_order_relaxed);\n        }\n    }\n    ~perf_counter_rate_atomic(void) {}\n\n    virtual void increment()\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_add(1, std::memory_order_relaxed);\n    }\n    virtual void decrement()\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_sub(1, std::memory_order_relaxed);\n    }\n    virtual void add(int64_t val)\n    {\n        uint64_t task_id = static_cast<int>(utils::get_current_tid());\n        _val[task_id % DIVIDE_CONTAINER].fetch_add(val, std::memory_order_relaxed);\n    }\n    virtual void set(int64_t val) { dassert(false, \"invalid execution flow\"); }\n    virtual double get_value()\n    {\n        uint64_t now = utils::get_current_physical_time_ns();\n        double interval = (now - _last_time) / 1e9;\n        if (interval <= 0.1)\n            return _rate;\n\n        double val = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; i++) {\n            val += _val[i].fetch_and(0, std::memory_order_relaxed);\n        }\n\n        _rate = val / interval;\n        _last_time = now;\n        return _rate;\n    }\n    virtual int64_t get_integer_value() { return (int64_t)get_value(); }\n    virtual double get_percentile(dsn_perf_counter_percentile_type_t type)\n    {\n        dassert(false, \"invalid execution flow\");\n        return 0.0;\n    }\n\nprivate:\n    std::atomic<double> _rate;\n    std::atomic<uint64_t> _last_time;\n    std::atomic<int64_t> _val[DIVIDE_CONTAINER];\n};\n\n// -----------   NUMBER_PERCENTILE perf counter ---------------------------------\n\n#define MAX_QUEUE_LENGTH 5000\n#define _LEFT 0\n#define _RIGHT 1\n#define _QLEFT 2\n#define _QRIGHT 3\n\nclass perf_counter_number_percentile_atomic : public perf_counter\n{\npublic:\n    perf_counter_number_percentile_atomic(const char *app,\n                                          const char *section,\n                                          const char *name,\n                                          dsn_perf_counter_type_t type,\n                                          const char *dsptr,\n                                          bool use_timer = true)\n        : perf_counter(app, section, name, type, dsptr), _tail(0)\n    {\n        _results[COUNTER_PERCENTILE_50] = 0;\n        _results[COUNTER_PERCENTILE_90] = 0;\n        _results[COUNTER_PERCENTILE_95] = 0;\n        _results[COUNTER_PERCENTILE_99] = 0;\n        _results[COUNTER_PERCENTILE_999] = 0;\n\n        if (!use_timer) {\n            return;\n        }\n\n        _counter_computation_interval_seconds = (int)dsn_config_get_value_uint64(\n            \"components.pegasus_perf_counter_number_percentile_atomic\",\n            \"counter_computation_interval_seconds\",\n            10,\n            \"period (seconds) the system computes the percentiles of the \"\n            \"pegasus_perf_counter_number_percentile_atomic counters\");\n        _timer.reset(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios));\n        _timer->expires_from_now(\n            boost::posix_time::seconds(::rand() % _counter_computation_interval_seconds + 1));\n        _timer->async_wait(std::bind(\n            &perf_counter_number_percentile_atomic::on_timer, this, _timer, std::placeholders::_1));\n    }\n\n    ~perf_counter_number_percentile_atomic(void)\n    {\n        if (_timer) {\n            _timer->cancel();\n        }\n    }\n\n    virtual void increment() { dassert(false, \"invalid execution flow\"); }\n    virtual void decrement() { dassert(false, \"invalid execution flow\"); }\n    virtual void add(int64_t val) { dassert(false, \"invalid execution flow\"); }\n    virtual void set(int64_t val)\n    {\n        uint64_t idx = _tail.fetch_add(1, std::memory_order_relaxed);\n        _samples[idx % MAX_QUEUE_LENGTH] = val;\n    }\n\n    virtual double get_value()\n    {\n        dassert(false, \"invalid execution flow\");\n        return 0.0;\n    }\n    virtual int64_t get_integer_value() { return (int64_t)get_value(); }\n\n    virtual double get_percentile(dsn_perf_counter_percentile_type_t type)\n    {\n        if ((type < 0) || (type >= COUNTER_PERCENTILE_COUNT)) {\n            dassert(false, \"send a wrong counter percentile type\");\n            return 0.0;\n        }\n        return (double)_results[type];\n    }\n\n    virtual int get_latest_samples(int required_sample_count,\n                                   /*out*/ samples_t &samples) const override\n    {\n        dassert(required_sample_count <= MAX_QUEUE_LENGTH, \"\");\n\n        uint64_t count = _tail.load();\n        int return_count = count >= (uint64_t)required_sample_count ? required_sample_count : count;\n\n        samples.clear();\n        int end_index = (count + MAX_QUEUE_LENGTH - 1) % MAX_QUEUE_LENGTH;\n        int start_index = (end_index + MAX_QUEUE_LENGTH - return_count) % MAX_QUEUE_LENGTH;\n\n        if (end_index >= start_index) {\n            samples.push_back(std::make_pair((int64_t *)_samples + start_index, return_count));\n        } else {\n            samples.push_back(\n                std::make_pair((int64_t *)_samples + start_index, MAX_QUEUE_LENGTH - start_index));\n            samples.push_back(std::make_pair((int64_t *)_samples,\n                                             return_count - (MAX_QUEUE_LENGTH - start_index)));\n        }\n\n        return return_count;\n    }\n\n    virtual int64_t get_latest_sample() const override\n    {\n        int idx = (_tail.load() + MAX_QUEUE_LENGTH - 1) % MAX_QUEUE_LENGTH;\n        return _samples[idx];\n    }\n\nprivate:\n    friend class perf_counter_nth_element_finder;\n\n    struct compute_context\n    {\n        int64_t ask[COUNTER_PERCENTILE_COUNT];\n        int64_t tmp[MAX_QUEUE_LENGTH];\n        int64_t mid_tmp[MAX_QUEUE_LENGTH];\n        int calc_queue[MAX_QUEUE_LENGTH][4];\n    };\n\n    void insert_calc_queue(const boost::shared_ptr<compute_context> &ctx,\n                           int left,\n                           int right,\n                           int qleft,\n                           int qright,\n                           int &calc_tail)\n    {\n        calc_tail++;\n        ctx->calc_queue[calc_tail][_LEFT] = left;\n        ctx->calc_queue[calc_tail][_RIGHT] = right;\n        ctx->calc_queue[calc_tail][_QLEFT] = qleft;\n        ctx->calc_queue[calc_tail][_QRIGHT] = qright;\n        return;\n    }\n\n    int64_t find_mid(const boost::shared_ptr<compute_context> &ctx, int left, int right)\n    {\n        if (left == right)\n            return ctx->mid_tmp[left];\n\n        for (int index = left; index < right; index += 5) {\n            int remain_num = index + 5 >= right ? right - index + 1 : 5;\n            for (int i = index; i < index + remain_num; i++) {\n                int j;\n                int64_t k = ctx->mid_tmp[i];\n                for (j = i - 1; (j >= index) && (ctx->mid_tmp[j] > k); j--)\n                    ctx->mid_tmp[j + 1] = ctx->mid_tmp[j];\n                ctx->mid_tmp[j + 1] = k;\n            }\n            ctx->mid_tmp[(index - left) / 5] = ctx->mid_tmp[index + remain_num / 2];\n        }\n\n        return find_mid(ctx, 0, (right - left - 1) / 5);\n    }\n\n    void select(const boost::shared_ptr<compute_context> &ctx,\n                int left,\n                int right,\n                int qleft,\n                int qright,\n                int &calc_tail)\n    {\n        int i, j, index, now;\n        int64_t mid;\n\n        if (qleft > qright)\n            return;\n\n        if (left == right) {\n            for (i = qleft; i <= qright; i++)\n                if (ctx->ask[i] == 1) {\n                    _results[i] = ctx->tmp[left];\n                } else\n                    dassert(false, \"select percentail wrong!!!\");\n            return;\n        }\n\n        for (i = left; i <= right; i++)\n            ctx->mid_tmp[i] = ctx->tmp[i];\n        mid = find_mid(ctx, left, right);\n\n        for (index = left; index <= right; index++)\n            if (ctx->tmp[index] == mid)\n                break;\n\n        ctx->tmp[index] = ctx->tmp[left];\n        index = left;\n        for (i = left, j = right; i <= j;) {\n            while ((i <= j) && (ctx->tmp[j] > mid))\n                j--;\n            if (i <= j)\n                ctx->tmp[index] = ctx->tmp[j], index = j--;\n            while ((i <= j) && (ctx->tmp[i] < mid))\n                i++;\n            if (i <= j)\n                ctx->tmp[index] = ctx->tmp[i], index = i++;\n        }\n        ctx->tmp[index] = mid;\n\n        now = index - left + 1;\n        for (i = qleft; (i <= qright) && (ctx->ask[i] < now); i++)\n            ;\n        for (j = i; j <= qright; j++)\n            ctx->ask[j] -= now;\n        for (j = i; (j <= qright) && (ctx->ask[j] == 0); j++)\n            ctx->ask[j]++;\n        insert_calc_queue(ctx, left, index - 1, qleft, i - 1, calc_tail);\n        insert_calc_queue(ctx, index, index, i, j - 1, calc_tail);\n        insert_calc_queue(ctx, index + 1, right, j, qright, calc_tail);\n        return;\n    }\n\n    void calc(const boost::shared_ptr<compute_context> &ctx)\n    {\n        uint64_t _num = _tail.load();\n        if (_num > MAX_QUEUE_LENGTH)\n            _num = MAX_QUEUE_LENGTH;\n\n        if (_num == 0)\n            return;\n        for (int i = 0; i < _num; i++)\n            ctx->tmp[i] = _samples[i];\n\n        ctx->ask[COUNTER_PERCENTILE_50] = (int)(_num * 0.5) + 1;\n        ctx->ask[COUNTER_PERCENTILE_90] = (int)(_num * 0.90) + 1;\n        ctx->ask[COUNTER_PERCENTILE_95] = (int)(_num * 0.95) + 1;\n        ctx->ask[COUNTER_PERCENTILE_99] = (int)(_num * 0.99) + 1;\n        ctx->ask[COUNTER_PERCENTILE_999] = (int)(_num * 0.999) + 1;\n        // must be sorted\n        // std::sort(ctx->ask, ctx->ask + MAX_TYPE_NUMBER);\n\n        int l, r = 0;\n\n        insert_calc_queue(ctx, 0, _num - 1, 0, COUNTER_PERCENTILE_COUNT - 1, r);\n        for (l = 1; l <= r; l++)\n            select(ctx,\n                   ctx->calc_queue[l][_LEFT],\n                   ctx->calc_queue[l][_RIGHT],\n                   ctx->calc_queue[l][_QLEFT],\n                   ctx->calc_queue[l][_QRIGHT],\n                   r);\n\n        return;\n    }\n\n    void on_timer(std::shared_ptr<boost::asio::deadline_timer> timer,\n                  const boost::system::error_code &ec)\n    {\n        // as the callback is not in tls context, so the log system calls like ddebug, dassert will\n        // cause a lock\n        if (!ec) {\n            calc(boost::make_shared<compute_context>());\n\n            timer->expires_from_now(\n                boost::posix_time::seconds(_counter_computation_interval_seconds));\n            timer->async_wait(std::bind(&perf_counter_number_percentile_atomic::on_timer,\n                                        this,\n                                        timer,\n                                        std::placeholders::_1));\n        } else if (boost::system::errc::operation_canceled != ec) {\n            dassert(false, \"on_timer error!!!\");\n        }\n    }\n\n    std::shared_ptr<boost::asio::deadline_timer> _timer;\n    std::atomic<uint64_t> _tail; // should use unsigned int to avoid out of bound\n    int64_t _samples[MAX_QUEUE_LENGTH];\n    int64_t _results[COUNTER_PERCENTILE_COUNT];\n    int _counter_computation_interval_seconds;\n};\n\n#pragma pack(pop)\n} // namespace\n"
  },
  {
    "path": "src/perf_counter/perf_counters.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <regex>\n\n#include <dsn/perf_counter/perf_counter.h>\n#include <dsn/perf_counter/perf_counters.h>\n#include <dsn/perf_counter/perf_counter_utils.h>\n\n#include <dsn/cpp/service_app.h>\n#include <dsn/cpp/json_helper.h>\n\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/utility/string_view.h>\n#include <dsn/utils/time_utils.h>\n\n#include \"perf_counter_atomic.h\"\n#include \"builtin_counters.h\"\n#include \"runtime/service_engine.h\"\n\nnamespace dsn {\n\nperf_counters::perf_counters()\n{\n    // make shared_io_service destructed after perf_counters,\n    // because shared_io_service will destruct the timer created by perf_counters\n    // It will produce heap-use-after-free error if shared_io_service destructed in front of\n    // perf_counters\n    tools::shared_io_service::instance();\n\n    _perf_counters_cmd = command_manager::instance().register_command(\n        {\"perf-counters\"},\n        \"perf-counters - query perf counters, filtered by OR of POSIX basic regular expressions\",\n        \"perf-counters [regexp]...\",\n        [](const std::vector<std::string> &args) {\n            return perf_counters::instance().list_snapshot_by_regexp(args);\n        });\n    _perf_counters_by_substr_cmd = command_manager::instance().register_command(\n        {\"perf-counters-by-substr\"},\n        \"perf-counters-by-substr - query perf counters, filtered by OR of substrs\",\n        \"perf-counters-by-substr [substr]...\",\n        [](const std::vector<std::string> &args) {\n            return perf_counters::instance().list_snapshot_by_literal(\n                args, [](const std::string &arg, const counter_snapshot &cs) {\n                    return cs.name.find(arg) != std::string::npos;\n                });\n        });\n    _perf_counters_by_prefix_cmd = command_manager::instance().register_command(\n        {\"perf-counters-by-prefix\"},\n        \"perf-counters-by-prefix - query perf counters, filtered by OR of prefix strings\",\n        \"perf-counters-by-prefix [prefix]...\",\n        [](const std::vector<std::string> &args) {\n            return perf_counters::instance().list_snapshot_by_literal(\n                args, [](const std::string &arg, const counter_snapshot &cs) {\n                    return cs.name.size() >= arg.size() &&\n                           ::memcmp(cs.name.c_str(), arg.c_str(), arg.size()) == 0;\n                });\n        });\n    _perf_counters_by_postfix_cmd = command_manager::instance().register_command(\n        {\"perf-counters-by-postfix\"},\n        \"perf-counters-by-postfix - query perf counters, filtered by OR of postfix strings\",\n        \"perf-counters-by-postfix [postfix]...\",\n        [](const std::vector<std::string> &args) {\n            return perf_counters::instance().list_snapshot_by_literal(\n                args, [](const std::string &arg, const counter_snapshot &cs) {\n                    return cs.name.size() >= arg.size() &&\n                           ::memcmp(cs.name.c_str() + cs.name.size() - arg.size(),\n                                    arg.c_str(),\n                                    arg.size()) == 0;\n                });\n        });\n}\n\nperf_counters::~perf_counters()\n{\n    _counters.clear();\n    UNREGISTER_VALID_HANDLER(_perf_counters_cmd);\n    UNREGISTER_VALID_HANDLER(_perf_counters_by_substr_cmd);\n    UNREGISTER_VALID_HANDLER(_perf_counters_by_prefix_cmd);\n    UNREGISTER_VALID_HANDLER(_perf_counters_by_postfix_cmd);\n}\n\nperf_counter_ptr perf_counters::get_app_counter(const char *section,\n                                                const char *name,\n                                                dsn_perf_counter_type_t flags,\n                                                const char *dsptr,\n                                                bool create_if_not_exist)\n{\n    auto cnode = task::get_current_node2();\n    dassert(cnode != nullptr, \"cannot get current service node!\");\n    return get_global_counter(cnode->full_name(), section, name, flags, dsptr, create_if_not_exist);\n}\n\nperf_counter_ptr perf_counters::get_global_counter(const char *app,\n                                                   const char *section,\n                                                   const char *name,\n                                                   dsn_perf_counter_type_t flags,\n                                                   const char *dsptr,\n                                                   bool create_if_not_exist)\n{\n    std::string full_name;\n    perf_counter::build_full_name(app, section, name, full_name);\n\n    utils::auto_write_lock l(_lock);\n    if (create_if_not_exist) {\n        auto it = _counters.find(full_name);\n        if (it == _counters.end()) {\n            perf_counter_ptr counter = new_counter(app, section, name, flags, dsptr);\n            _counters.emplace(full_name, counter_object{counter, 1});\n            return counter;\n        } else {\n            dassert(it->second.counter->type() == flags,\n                    \"counters with the same name %s with differnt types, (%d) vs (%d)\",\n                    full_name.c_str(),\n                    it->second.counter->type(),\n                    flags);\n            ++it->second.user_reference;\n            return it->second.counter;\n        }\n    } else {\n        auto it = _counters.find(full_name);\n        if (it == _counters.end())\n            return nullptr;\n        else {\n            ++it->second.user_reference;\n            return it->second.counter;\n        }\n    }\n}\n\nbool perf_counters::remove_counter(const char *full_name)\n{\n    int remain_ref;\n    {\n        utils::auto_write_lock l(_lock);\n        auto it = _counters.find(full_name);\n        if (it == _counters.end())\n            return false;\n        else {\n            counter_object &c = it->second;\n            remain_ref = (--c.user_reference);\n            if (remain_ref == 0) {\n                _counters.erase(it);\n            }\n        }\n    }\n\n    dinfo(\"performance counter %s is removed, remaining reference (%d)\", full_name, remain_ref);\n    return true;\n}\n\nperf_counter_ptr perf_counters::get_counter(const std::string &full_name)\n{\n    utils::auto_read_lock l(_lock);\n    auto it = _counters.find(full_name);\n    if (it != _counters.end())\n        return it->second.counter;\n\n    return nullptr;\n}\n\nperf_counter *perf_counters::new_counter(const char *app,\n                                         const char *section,\n                                         const char *name,\n                                         dsn_perf_counter_type_t type,\n                                         const char *dsptr)\n{\n    if (type == dsn_perf_counter_type_t::COUNTER_TYPE_NUMBER)\n        return new perf_counter_number_atomic(app, section, name, type, dsptr);\n    else if (type == dsn_perf_counter_type_t::COUNTER_TYPE_VOLATILE_NUMBER)\n        return new perf_counter_volatile_number_atomic(app, section, name, type, dsptr);\n    else if (type == dsn_perf_counter_type_t::COUNTER_TYPE_RATE)\n        return new perf_counter_rate_atomic(app, section, name, type, dsptr);\n    else if (type == dsn_perf_counter_type_t::COUNTER_TYPE_NUMBER_PERCENTILES)\n        return new perf_counter_number_percentile_atomic(app, section, name, type, dsptr);\n    else {\n        dassert(false, \"invalid type(%d)\", type);\n        return nullptr;\n    }\n}\n\nvoid perf_counters::get_all_counters(std::vector<perf_counter_ptr> *all) const\n{\n    all->clear();\n    utils::auto_read_lock l(_lock);\n    all->reserve(_counters.size());\n    for (auto &p : _counters) {\n        all->push_back(p.second.counter);\n    }\n}\n\nstd::string perf_counters::list_snapshot_by_regexp(const std::vector<std::string> &args) const\n{\n    perf_counter_info info;\n\n    std::vector<std::regex> regs;\n    regs.reserve(args.size());\n    for (auto &arg : args) {\n        try {\n            regs.emplace_back(arg, std::regex_constants::basic);\n        } catch (...) {\n            info.result = \"ERROR: invalid filter: \" + arg;\n            break;\n        }\n    }\n\n    if (info.result.empty()) {\n        snapshot_iterator visitor = [&regs, &info](const counter_snapshot &cs) {\n            bool matched = false;\n            if (regs.empty()) {\n                matched = true;\n            } else {\n                for (auto &reg : regs) {\n                    if (std::regex_match(cs.name, reg)) {\n                        matched = true;\n                        break;\n                    }\n                }\n            }\n\n            if (matched) {\n                info.counters.emplace_back(cs.name.c_str(), cs.type, cs.value);\n            }\n        };\n        iterate_snapshot(visitor);\n        info.result = \"OK\";\n    }\n\n    std::stringstream ss;\n    info.timestamp = _timestamp;\n    char buf[20];\n    utils::time_ms_to_date_time(info.timestamp * 1000, buf, sizeof(buf));\n    info.timestamp_str = buf;\n    info.encode_json_state(ss);\n    return ss.str();\n}\n\n// the filter should return true if the counter satisfies condition.\nstd::string perf_counters::list_snapshot_by_literal(\n    const std::vector<std::string> &args,\n    std::function<bool(const std::string &arg, const counter_snapshot &cs)> filter) const\n{\n    perf_counter_info info;\n\n    snapshot_iterator visitor = [&args, &info, &filter](const counter_snapshot &cs) {\n        bool matched = false;\n        if (args.empty()) {\n            matched = true;\n        } else {\n            for (auto &arg : args) {\n                if (filter(arg, cs)) {\n                    matched = true;\n                    break;\n                }\n            }\n        }\n\n        if (matched) {\n            info.counters.emplace_back(cs.name.c_str(), cs.type, cs.value);\n        }\n    };\n    iterate_snapshot(visitor);\n    info.result = \"OK\";\n\n    std::stringstream ss;\n    info.timestamp = _timestamp;\n    char buf[20];\n    utils::time_ms_to_date_time(info.timestamp * 1000, buf, sizeof(buf));\n    info.timestamp_str = buf;\n    info.encode_json_state(ss);\n    return ss.str();\n}\n\nvoid perf_counters::take_snapshot()\n{\n    builtin_counters::instance().update_counters();\n\n    std::vector<perf_counter_ptr> all_counters;\n    get_all_counters(&all_counters);\n\n    utils::auto_write_lock l(_snapshot_lock);\n    for (auto &p : _snapshots) {\n        p.second.updated_recently = false;\n    }\n\n    // updated counters from current value\n    for (const perf_counter_ptr &c : all_counters) {\n        counter_snapshot &cs = _snapshots[c->full_name()];\n        if (cs.name.empty()) {\n            // recently created counter, which wasn't in snapshot before\n            cs.name = c->full_name();\n            cs.type = c->type();\n        }\n        cs.updated_recently = true;\n        if (c->type() != COUNTER_TYPE_NUMBER_PERCENTILES) {\n            cs.value = c->get_value();\n        } else {\n            cs.value = c->get_percentile(COUNTER_PERCENTILE_99);\n\n            // take P999 metrics into account as well.\n            std::string name_p999 = std::string(c->full_name()) + \".p999\";\n            counter_snapshot &cs999 = _snapshots[name_p999];\n            cs999.name = std::move(name_p999);\n            cs999.type = c->type();\n            cs999.updated_recently = true;\n            cs999.value = c->get_percentile(COUNTER_PERCENTILE_999);\n        }\n    }\n\n    _timestamp = dsn_now_ms() / 1000;\n\n    // delete old counters\n    std::vector<std::string> old_counters;\n    for (auto &p : _snapshots)\n        if (!p.second.updated_recently)\n            old_counters.push_back(p.first);\n    for (const std::string &n : old_counters)\n        _snapshots.erase(n);\n}\n\nvoid perf_counters::iterate_snapshot(const snapshot_iterator &v) const\n{\n    utils::auto_read_lock l(_snapshot_lock);\n    for (auto &kv : _snapshots) {\n        v(kv.second);\n    }\n}\n\nvoid perf_counters::query_snapshot(const std::vector<std::string> &counters,\n                                   const snapshot_iterator &v,\n                                   std::vector<bool> *found) const\n{\n    std::vector<bool> result;\n    if (found == nullptr)\n        found = &result;\n\n    found->reserve(counters.size());\n    utils::auto_read_lock l(_snapshot_lock);\n    for (const std::string &name : counters) {\n        auto iter = _snapshots.find(name);\n        if (iter == _snapshots.end()) {\n            found->push_back(false);\n        } else {\n            found->push_back(true);\n            v(iter->second);\n        }\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/perf_counter/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_perf_counter_test)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS gtest dsn_runtime)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/perf_counter/test/clear.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf data dsn_perf_counter_test.xml\n"
  },
  {
    "path": "src/perf_counter/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n    return RUN_ALL_TESTS();\n}\n"
  },
  {
    "path": "src/perf_counter/test/perf_counter_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for perf counter.\n *\n * Revision history:\n *     Nov., 2015, @shengofsun (Weijie Sun), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool_api.h>\n#include <gtest/gtest.h>\n#include <thread>\n#include <cmath>\n#include <vector>\n\n#include \"perf_counter/perf_counter_atomic.h\"\n\nusing namespace dsn;\nusing namespace dsn::tools;\n\nconst int count_times = 10000;\n\nstatic void adder_function(perf_counter_ptr pc, int id, const std::vector<int> &vec)\n{\n    for (int i = id; i < 10000; i += 10)\n        pc->add(vec[i]);\n}\n\nstatic void perf_counter_inc_dec(perf_counter_ptr pc)\n{\n    std::thread inc_thread(\n        [](perf_counter_ptr counter) {\n            for (int i = 0; i < count_times; ++i)\n                counter->increment();\n        },\n        pc);\n    std::thread dec_thread(\n        [](perf_counter_ptr counter) {\n            for (int i = 0; i < count_times; ++i)\n                counter->decrement();\n        },\n        pc);\n\n    inc_thread.join();\n    dec_thread.join();\n}\n\ntypedef std::shared_ptr<std::thread> thread_ptr;\nstatic void perf_counter_add(perf_counter_ptr pc, const std::vector<int> &vec)\n{\n    std::vector<thread_ptr> add_threads;\n    for (int i = 0; i < 10; ++i) {\n        thread_ptr t(new std::thread(adder_function, pc, i, std::ref(vec)));\n        add_threads.push_back(t);\n    }\n    for (unsigned int i = 0; i != add_threads.size(); ++i)\n        add_threads[i]->join();\n}\n\nTEST(perf_counter, perf_counter_atomic)\n{\n    int ans = 0;\n    std::vector<int> vec(10000, 0);\n    for (int i = 0; i < vec.size(); ++i) {\n        vec[i] = rand() % 100;\n        ans += vec[i];\n    }\n    std::vector<int> gen_numbers{1, 5, 1043};\n    int sleep_interval = (int)dsn_config_get_value_uint64(\n        \"components.simple_perf_counter\", \"counter_computation_interval_seconds\", 3, \"period\");\n\n    perf_counter_ptr counter = new perf_counter_number_atomic(\n        \"\", \"\", \"\", dsn_perf_counter_type_t::COUNTER_TYPE_NUMBER, \"\");\n    perf_counter_inc_dec(counter);\n    perf_counter_add(counter, vec);\n    ddebug(\"%lf\", counter->get_value());\n\n    counter = new perf_counter_volatile_number_atomic(\n        \"\", \"\", \"\", dsn_perf_counter_type_t::COUNTER_TYPE_VOLATILE_NUMBER, \"\");\n    perf_counter_inc_dec(counter);\n    perf_counter_add(counter, vec);\n    ddebug(\"%lf\", counter->get_value());\n\n    counter =\n        new perf_counter_rate_atomic(\"\", \"\", \"\", dsn_perf_counter_type_t::COUNTER_TYPE_RATE, \"\");\n    perf_counter_inc_dec(counter);\n    perf_counter_add(counter, vec);\n    ddebug(\"%lf\", counter->get_value());\n\n    counter = new perf_counter_number_percentile_atomic(\n        \"\", \"\", \"\", dsn_perf_counter_type_t::COUNTER_TYPE_NUMBER_PERCENTILES, \"\");\n    std::this_thread::sleep_for(std::chrono::seconds(sleep_interval));\n    for (auto &count : gen_numbers) {\n        for (unsigned int i = 0; i != count; ++i)\n            counter->set(rand() % 10000);\n        // std::this_thread::sleep_for(std::chrono::seconds(sleep_interval));\n        for (int i = 0; i != COUNTER_PERCENTILE_COUNT; ++i)\n            ddebug(\"%lf\", counter->get_percentile((dsn_perf_counter_percentile_type_t)i));\n    }\n}\n\nTEST(perf_counter, print_type)\n{\n    ASSERT_STREQ(\"NUMBER\", dsn_counter_type_to_string(COUNTER_TYPE_NUMBER));\n    ASSERT_STREQ(\"VOLATILE_NUMBER\", dsn_counter_type_to_string(COUNTER_TYPE_VOLATILE_NUMBER));\n    ASSERT_STREQ(\"RATE\", dsn_counter_type_to_string(COUNTER_TYPE_RATE));\n    ASSERT_STREQ(\"PERCENTILE\", dsn_counter_type_to_string(COUNTER_TYPE_NUMBER_PERCENTILES));\n    ASSERT_STREQ(\"INVALID_COUNTER\", dsn_counter_type_to_string(COUNTER_TYPE_INVALID));\n\n    ASSERT_EQ(COUNTER_TYPE_NUMBER,\n              dsn_counter_type_from_string(dsn_counter_type_to_string(COUNTER_TYPE_NUMBER)));\n    ASSERT_EQ(\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        dsn_counter_type_from_string(dsn_counter_type_to_string(COUNTER_TYPE_VOLATILE_NUMBER)));\n    ASSERT_EQ(COUNTER_TYPE_RATE,\n              dsn_counter_type_from_string(dsn_counter_type_to_string(COUNTER_TYPE_RATE)));\n    ASSERT_EQ(\n        COUNTER_TYPE_NUMBER_PERCENTILES,\n        dsn_counter_type_from_string(dsn_counter_type_to_string(COUNTER_TYPE_NUMBER_PERCENTILES)));\n    ASSERT_EQ(COUNTER_TYPE_INVALID, dsn_counter_type_from_string(\"xxxx\"));\n\n    ASSERT_STREQ(\"P50\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_50));\n    ASSERT_STREQ(\"P90\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_90));\n    ASSERT_STREQ(\"P95\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_95));\n    ASSERT_STREQ(\"P99\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_99));\n    ASSERT_STREQ(\"P999\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_999));\n    ASSERT_STREQ(\"INVALID_PERCENTILE\", dsn_percentile_type_to_string(COUNTER_PERCENTILE_INVALID));\n\n    ASSERT_EQ(\n        COUNTER_PERCENTILE_50,\n        dsn_percentile_type_from_string(dsn_percentile_type_to_string(COUNTER_PERCENTILE_50)));\n    ASSERT_EQ(\n        COUNTER_PERCENTILE_90,\n        dsn_percentile_type_from_string(dsn_percentile_type_to_string(COUNTER_PERCENTILE_90)));\n    ASSERT_EQ(\n        COUNTER_PERCENTILE_95,\n        dsn_percentile_type_from_string(dsn_percentile_type_to_string(COUNTER_PERCENTILE_95)));\n    ASSERT_EQ(\n        COUNTER_PERCENTILE_99,\n        dsn_percentile_type_from_string(dsn_percentile_type_to_string(COUNTER_PERCENTILE_99)));\n    ASSERT_EQ(\n        COUNTER_PERCENTILE_999,\n        dsn_percentile_type_from_string(dsn_percentile_type_to_string(COUNTER_PERCENTILE_999)));\n    ASSERT_EQ(COUNTER_PERCENTILE_INVALID, dsn_percentile_type_from_string(\"afafda\"));\n}\n"
  },
  {
    "path": "src/perf_counter/test/perf_counters_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for perf_counters.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/perf_counter/perf_counters.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/perf_counter/perf_counter_utils.h>\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\n\nTEST(perf_counters_test, counter_create_remove)\n{\n    perf_counter_ptr p;\n\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"number_counter\", COUNTER_TYPE_NUMBER, \"\", false);\n    ASSERT_EQ(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"number_counter\", COUNTER_TYPE_NUMBER, \"\", true);\n    ASSERT_NE(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"number_counter\", COUNTER_TYPE_NUMBER, \"\", false);\n    ASSERT_NE(nullptr, p);\n\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"volatile_number_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\", false);\n    ASSERT_EQ(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"volatile_number_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\", true);\n    ASSERT_NE(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"volatile_number_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\", false);\n    ASSERT_NE(nullptr, p);\n\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"rate_counter\", COUNTER_TYPE_RATE, \"\", false);\n    ASSERT_EQ(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"rate_counter\", COUNTER_TYPE_RATE, \"\", true);\n    ASSERT_NE(nullptr, p);\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"rate_counter\", COUNTER_TYPE_RATE, \"\", false);\n    ASSERT_NE(nullptr, p);\n\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"number_counter\"));\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"unexist_counter\"));\n\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*number_counter\"));\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*number_counter\"));\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"app*test*number_counter\"));\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"number_counter\", COUNTER_TYPE_NUMBER, \"\", false);\n    ASSERT_EQ(nullptr, p);\n\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*volatile_number_counter\"));\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*volatile_number_counter\"));\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"app*test*volatile_number_counter\"));\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"volatile_number_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\", false);\n    ASSERT_EQ(nullptr, p);\n\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*rate_counter\"));\n    ASSERT_TRUE(perf_counters::instance().remove_counter(\"app*test*rate_counter\"));\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"app*test*rate_counter\"));\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"rate_counter\", COUNTER_TYPE_RATE, \"\", false);\n    ASSERT_EQ(nullptr, p);\n\n    p = perf_counters::instance().get_global_counter(\n        \"app\", \"test\", \"unexist_counter\", COUNTER_TYPE_NUMBER, \"\", false);\n    ASSERT_EQ(nullptr, p);\n    ASSERT_FALSE(perf_counters::instance().remove_counter(\"app*test*unexist_counter\"));\n}\n\ntemplate <typename K, typename V>\nbool check_map_contains(const std::map<K, V> &super, const std::map<K, V> &sub)\n{\n    for (const auto &kv : sub) {\n        auto it = super.find(kv.first);\n        if (it == super.end()) {\n            return false;\n        }\n        if (it->second != kv.second) {\n            return false;\n        }\n    }\n    return true;\n}\n\nTEST(perf_counters_test, snapshot)\n{\n    std::map<std::string, dsn_perf_counter_type_t> expected;\n\n    std::map<std::string, dsn_perf_counter_type_t> counter_keys;\n    perf_counters::instance().take_snapshot();\n    perf_counters::snapshot_iterator iter =\n        [&counter_keys](const perf_counters::counter_snapshot &cs) mutable {\n            counter_keys.emplace(cs.name, cs.type);\n        };\n\n    counter_keys.clear();\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n    };\n    perf_counters::instance().iterate_snapshot(iter);\n    // in the beginning, builtin counters are in counter_list\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n\n    dsn::perf_counter_wrapper c1;\n    c1.init_global_counter(\"a\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER, \"\");\n    dsn::perf_counter_wrapper c2;\n    c2.init_global_counter(\"a\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER, \"\");\n\n    dsn::perf_counter_wrapper c3;\n    c3.init_global_counter(\"b\", \"s\", \"test_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\");\n    dsn::perf_counter_wrapper c4;\n    c4.init_global_counter(\"b\", \"s\", \"test_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\");\n\n    // snapshot will contain new counters\n    perf_counters::instance().take_snapshot();\n    counter_keys.clear();\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n        {\"a*s*test_counter\", COUNTER_TYPE_NUMBER},\n        {\"b*s*test_counter\", COUNTER_TYPE_VOLATILE_NUMBER},\n    };\n    perf_counters::instance().iterate_snapshot(iter);\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n\n    dsn::perf_counter_wrapper c5;\n    c5.init_global_counter(\"c\", \"s\", \"test_counter\", COUNTER_TYPE_RATE, \"\");\n    dsn::perf_counter_wrapper c6;\n    c6.init_global_counter(\"c\", \"s\", \"test_counter\", COUNTER_TYPE_RATE, \"\");\n\n    dsn::perf_counter_wrapper c7;\n    c7.init_global_counter(\"d\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES, \"\");\n    dsn::perf_counter_wrapper c8;\n    c8.init_global_counter(\"d\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES, \"\");\n\n    // new counters won't be contained in snapshot if you don't call \"take snapshot\"\n    counter_keys.clear();\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n        {\"a*s*test_counter\", COUNTER_TYPE_NUMBER},\n        {\"b*s*test_counter\", COUNTER_TYPE_VOLATILE_NUMBER},\n    };\n    perf_counters::instance().iterate_snapshot(iter);\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n    ASSERT_TRUE(counter_keys.find(\"c*s*test_counter\") == counter_keys.end());\n    ASSERT_TRUE(counter_keys.find(\"d*s*test_counter\") == counter_keys.end());\n\n    // after taking snapshot, new counters will be contained\n    counter_keys.clear();\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n        {\"a*s*test_counter\", COUNTER_TYPE_NUMBER},\n        {\"b*s*test_counter\", COUNTER_TYPE_VOLATILE_NUMBER},\n        {\"c*s*test_counter\", COUNTER_TYPE_RATE},\n        {\"d*s*test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES},\n    };\n    perf_counters::instance().take_snapshot();\n    perf_counters::instance().iterate_snapshot(iter);\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n\n    c1.clear();\n    c2.clear();\n    c3.clear();\n    c4.clear();\n\n    // although remove counters, but snapshot won't been affected if you don't call take snapshot\n    counter_keys.clear();\n    perf_counters::instance().iterate_snapshot(iter);\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n        {\"a*s*test_counter\", COUNTER_TYPE_NUMBER},\n        {\"b*s*test_counter\", COUNTER_TYPE_VOLATILE_NUMBER},\n        {\"c*s*test_counter\", COUNTER_TYPE_RATE},\n        {\"d*s*test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES},\n    };\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n\n    // after take snapshot, removed counters will be removed in snapshot\n    perf_counters::instance().take_snapshot();\n    counter_keys.clear();\n    perf_counters::instance().iterate_snapshot(iter);\n    expected = {\n        {\"replica*server*memused.virt(MB)\", COUNTER_TYPE_NUMBER},\n        {\"replica*server*memused.res(MB)\", COUNTER_TYPE_NUMBER},\n        {\"c*s*test_counter\", COUNTER_TYPE_RATE},\n        {\"d*s*test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES},\n    };\n    ASSERT_TRUE(check_map_contains(counter_keys, expected));\n    ASSERT_TRUE(counter_keys.find(\"a*s*test_counter\") == counter_keys.end());\n    ASSERT_TRUE(counter_keys.find(\"b*s*test_counter\") == counter_keys.end());\n\n    // query snapshot\n    std::vector<std::string> target_keys = {\n        \"a*s*test_counter\", \"c*s*test_counter\", \"b*s*test_counter\", \"d*s*test_counter\"};\n    expected = {\n        {\"c*s*test_counter\", COUNTER_TYPE_RATE},\n        {\"d*s*test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES},\n    };\n\n    counter_keys.clear();\n    perf_counters::instance().query_snapshot(target_keys, iter, nullptr);\n    ASSERT_EQ(2, counter_keys.size());\n    ASSERT_EQ(expected, counter_keys);\n\n    counter_keys.clear();\n    std::vector<bool> found;\n    perf_counters::instance().query_snapshot(target_keys, iter, &found);\n    ASSERT_EQ(4, found.size());\n    std::vector<bool> expected_found = {false, true, false, true};\n    ASSERT_EQ(expected_found, found);\n    ASSERT_EQ(expected, counter_keys);\n}\n\nTEST(perf_counters_test, query_snapshot_by_regexp)\n{\n    dsn::perf_counter_wrapper c1;\n    c1.init_global_counter(\"a\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER, \"\");\n    dsn::perf_counter_wrapper c2;\n    c2.init_global_counter(\"a\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER, \"\");\n\n    dsn::perf_counter_wrapper c3;\n    c3.init_global_counter(\"b\", \"s\", \"test_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\");\n    dsn::perf_counter_wrapper c4;\n    c4.init_global_counter(\"b\", \"s\", \"test_counter\", COUNTER_TYPE_VOLATILE_NUMBER, \"\");\n\n    dsn::perf_counter_wrapper c5;\n    c5.init_global_counter(\"c\", \"s\", \"test_counter\", COUNTER_TYPE_RATE, \"\");\n    dsn::perf_counter_wrapper c6;\n    c6.init_global_counter(\"c\", \"s\", \"test_counter\", COUNTER_TYPE_RATE, \"\");\n\n    dsn::perf_counter_wrapper c7;\n    c7.init_global_counter(\"d\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES, \"\");\n    dsn::perf_counter_wrapper c8;\n    c8.init_global_counter(\"d\", \"s\", \"test_counter\", COUNTER_TYPE_NUMBER_PERCENTILES, \"\");\n\n    perf_counters::instance().take_snapshot();\n    std::string result = perf_counters::instance().list_snapshot_by_regexp({\".*\\\\*s\\\\*.*\"});\n\n    dsn::perf_counter_info info;\n    dsn::json::json_forwarder<dsn::perf_counter_info>::decode(\n        dsn::blob(result.c_str(), 0, result.size()), info);\n    ASSERT_STREQ(\"OK\", info.result.c_str());\n    ASSERT_GT(info.timestamp, 0);\n    ASSERT_TRUE(!info.timestamp_str.empty());\n    printf(\"got timestamp: %s\\n\", info.timestamp_str.c_str());\n    ASSERT_EQ(4 + 1, info.counters.size()); // add 1 for p999 counter\n\n    std::map<std::string, std::string> expected = {\n        {\"a*s*test_counter\", dsn_counter_type_to_string(COUNTER_TYPE_NUMBER)},\n        {\"b*s*test_counter\", dsn_counter_type_to_string(COUNTER_TYPE_VOLATILE_NUMBER)},\n        {\"c*s*test_counter\", dsn_counter_type_to_string(COUNTER_TYPE_RATE)},\n        {\"d*s*test_counter\", dsn_counter_type_to_string(COUNTER_TYPE_NUMBER_PERCENTILES)},\n        {\"d*s*test_counter.p999\", dsn_counter_type_to_string(COUNTER_TYPE_NUMBER_PERCENTILES)},\n    };\n    std::map<std::string, std::string> actual;\n    for (const dsn::perf_counter_metric &m : info.counters) {\n        actual.emplace(m.name, m.type);\n    }\n    ASSERT_EQ(expected, actual);\n\n    result = perf_counters::instance().list_snapshot_by_regexp({\"hahaha\"});\n    dsn::json::json_forwarder<dsn::perf_counter_info>::decode(\n        dsn::blob(result.c_str(), 0, result.size()), info);\n    ASSERT_STREQ(\"OK\", info.result.c_str());\n    ASSERT_GT(info.timestamp, 0);\n    ASSERT_TRUE(!info.timestamp_str.empty());\n    printf(\"got timestamp: %s\\n\", info.timestamp_str.c_str());\n    ASSERT_TRUE(info.counters.empty());\n\n    result = perf_counters::instance().list_snapshot_by_regexp({\"\"});\n    dsn::json::json_forwarder<dsn::perf_counter_info>::decode(\n        dsn::blob(result.c_str(), 0, result.size()), info);\n    ASSERT_STREQ(\"OK\", info.result.c_str());\n    ASSERT_GT(info.timestamp, 0);\n    ASSERT_TRUE(!info.timestamp_str.empty());\n    printf(\"got timestamp: %s\\n\", info.timestamp_str.c_str());\n    ASSERT_TRUE(info.counters.empty());\n}\n\nTEST(perf_counters_test, get_by_fullname)\n{\n    struct test_case\n    {\n        const char *app;\n        const char *section;\n        const char *name;\n        dsn_perf_counter_type_t type;\n        const char *dsptr;\n        bool create;\n    } tests[] = {{\"replica\", \"eon\", \"get_by_fullname1\", COUNTER_TYPE_NUMBER, \"pf1\", false},\n                 {\"replica\", \"eon\", \"get_by_fullname2\", COUNTER_TYPE_NUMBER, \"pf2\", true}};\n\n    for (auto test : tests) {\n        // precondition: make sure the perf counter doesn't exist\n        std::string perf_counter_name;\n        perf_counter::build_full_name(test.app, test.section, test.name, perf_counter_name);\n        perf_counters::instance().remove_counter(perf_counter_name.c_str());\n\n        if (test.create) {\n            // create perf counter\n            perf_counter_wrapper counter;\n            counter.init_global_counter(test.app, test.section, test.name, test.type, test.dsptr);\n            ASSERT_NE(nullptr, perf_counters::instance().get_counter(perf_counter_name));\n        } else {\n            ASSERT_EQ(nullptr, perf_counters::instance().get_counter(perf_counter_name));\n        }\n    }\n}\n"
  },
  {
    "path": "src/perf_counter/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn_perf_counter_test.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_perf_counter_test\n"
  },
  {
    "path": "src/remote_cmd/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_dist_cmd)\n\nthrift_generate_cpp(\n    COMMAND_THRIFT_SRCS\n    COMMAND_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/command.thrift\n)\n\nseT(MY_PROJ_SRC ${COMMAND_THRIFT_SRCS})\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\ndsn_add_static_library()\n"
  },
  {
    "path": "src/remote_cmd/command.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\nnamespace cpp dsn.dist.cmd\n\nstruct command\n{\n    1:string       cmd;\n    2:list<string> arguments;\n}\n"
  },
  {
    "path": "src/remote_cmd/remote_command.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/remote_command.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/cpp/rpc_holder.h>\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/smart_pointers.h>\n\n#include \"command_types.h\"\n\nnamespace dsn {\nnamespace dist {\nnamespace cmd {\n\nDEFINE_TASK_CODE_RPC(RPC_CLI_CLI_CALL, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\n\ntypedef rpc_holder<command, std::string> remote_command_rpc;\n\ntask_ptr async_call_remote(rpc_address remote,\n                           const std::string &cmd,\n                           const std::vector<std::string> &arguments,\n                           std::function<void(error_code, const std::string &)> callback,\n                           std::chrono::milliseconds timeout)\n{\n    std::unique_ptr<command> request = make_unique<command>();\n    request->cmd = cmd;\n    request->arguments = arguments;\n    remote_command_rpc rpc(std::move(request), RPC_CLI_CLI_CALL, timeout);\n    return rpc.call(remote, nullptr, [ cb = std::move(callback), rpc ](error_code ec) {\n        cb(ec, rpc.response());\n    });\n}\n\nbool register_remote_command_rpc()\n{\n    rpc_request_handler cb = [](dsn::message_ex *msg) {\n        auto rpc = remote_command_rpc::auto_reply(msg);\n        command_manager::instance().run_command(\n            rpc.request().cmd, rpc.request().arguments, rpc.response());\n    };\n\n    return dsn_rpc_register_handler(RPC_CLI_CLI_CALL, \"call\", cb);\n}\n\n} // namespace cmd\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replica_server)\n\nset(DUPLICATION_SRC\n        duplication/replica_duplicator_manager.cpp\n        duplication/duplication_sync_timer.cpp\n        duplication/replica_duplicator.cpp\n        duplication/replica_follower.cpp\n        duplication/duplication_pipeline.cpp\n        duplication/load_from_private_log.cpp\n        duplication/mutation_batch.cpp\n)\n\nset(BACKUP_SRC backup/replica_backup_manager.cpp\n               backup/cold_backup_context.cpp\n               backup/replica_backup_server.cpp\n)\n\nset(BULK_LOAD_SRC bulk_load/replica_bulk_loader.cpp)\n\nset(SPLIT_SRC split/replica_split_manager.cpp)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC\n        ${DUPLICATION_SRC}\n        ${BACKUP_SRC}\n        ${BULK_LOAD_SRC}\n        ${SPLIT_SRC}\n)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \n    dsn_replication_common\n    dsn.failure_detector\n    dsn.block_service\n    dsn.block_service.local\n    dsn.block_service.fds\n    dsn.block_service.hdfs\n    dsn_nfs\n    dsn_dist_cmd\n    dsn_http\n    dsn_runtime\n    dsn_aio\n    galaxy-fds-sdk-cpp\n    PocoNet\n    PocoFoundation\n    PocoNetSSL\n    PocoJSON\n    )\n\nset(MY_BOOST_LIBS Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_shared_library()\n\nadd_subdirectory(duplication/test)\nadd_subdirectory(backup/test)\nadd_subdirectory(bulk_load/test)\nadd_subdirectory(split/test)\nadd_subdirectory(storage)\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/replica/backup/cold_backup_context.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"cold_backup_context.h\"\n#include \"common/backup_common.h\"\n#include \"replica/replica.h\"\n#include \"replica/replica_stub.h\"\n#include \"block_service/block_service_manager.h\"\n\n#include <dsn/utility/filesystem.h>\n\nnamespace dsn {\nnamespace replication {\n\nconst char *cold_backup_status_to_string(cold_backup_status status)\n{\n    switch (status) {\n    case ColdBackupInvalid:\n        return \"ColdBackupInvalid\";\n    case ColdBackupChecking:\n        return \"ColdBackupChecking\";\n    case ColdBackupChecked:\n        return \"ColdBackupChecked\";\n    case ColdBackupCheckpointing:\n        return \"ColdBackupCheckpointing\";\n    case ColdBackupCheckpointed:\n        return \"ColdBackupCheckpointed\";\n    case ColdBackupUploading:\n        return \"ColdBackupUploading\";\n    case ColdBackupPaused:\n        return \"ColdBackupPaused\";\n    case ColdBackupCanceled:\n        return \"ColdBackupCanceled\";\n    case ColdBackupCompleted:\n        return \"ColdBackupCompleted\";\n    case ColdBackupFailed:\n        return \"ColdBackupFailed\";\n    default:\n        dassert(false, \"\");\n    }\n    return \"ColdBackupXXX\";\n}\n\nvoid cold_backup_context::cancel()\n{\n    _status.store(ColdBackupCanceled);\n    if (_owner_replica != nullptr) {\n        _owner_replica->get_replica_stub()->_counter_cold_backup_recent_cancel_count->increment();\n    }\n}\n\nbool cold_backup_context::start_check()\n{\n    int invalid = ColdBackupInvalid;\n    if (_status.compare_exchange_strong(invalid, ColdBackupChecking)) {\n        _start_time_ms = dsn_now_ms();\n        return true;\n    } else {\n        return false;\n    }\n}\n\nbool cold_backup_context::fail_check(const char *failure_reason)\n{\n    int checking = ColdBackupChecking;\n    if (_status.compare_exchange_strong(checking, ColdBackupFailed)) {\n        strncpy(_reason, failure_reason, sizeof(_reason) - 1);\n        _reason[sizeof(_reason) - 1] = '\\0';\n        if (_owner_replica != nullptr) {\n            _owner_replica->get_replica_stub()->_counter_cold_backup_recent_fail_count->increment();\n        }\n        return true;\n    } else {\n        return false;\n    }\n}\n\nbool cold_backup_context::complete_check(bool uploaded)\n{\n    int checking = ColdBackupChecking;\n    if (uploaded) {\n        _progress.store(cold_backup_constant::PROGRESS_FINISHED);\n        if (_owner_replica != nullptr) {\n            _owner_replica->get_replica_stub()->_counter_cold_backup_recent_succ_count->increment();\n        }\n        return _status.compare_exchange_strong(checking, ColdBackupCompleted);\n    } else {\n        return _status.compare_exchange_strong(checking, ColdBackupChecked);\n    }\n}\n\nbool cold_backup_context::start_checkpoint()\n{\n    int checked = ColdBackupChecked;\n    if (_status.compare_exchange_strong(checked, ColdBackupCheckpointing)) {\n        return true;\n    } else {\n        return false;\n    }\n}\n\nbool cold_backup_context::fail_checkpoint(const char *failure_reason)\n{\n    int checkpointing = ColdBackupCheckpointing;\n    if (_status.compare_exchange_strong(checkpointing, ColdBackupFailed)) {\n        strncpy(_reason, failure_reason, sizeof(_reason) - 1);\n        _reason[sizeof(_reason) - 1] = '\\0';\n        if (_owner_replica != nullptr) {\n            _owner_replica->get_replica_stub()->_counter_cold_backup_recent_fail_count->increment();\n        }\n        return true;\n    } else {\n        return false;\n    }\n}\n\nbool cold_backup_context::complete_checkpoint()\n{\n    int checkpointing = ColdBackupCheckpointing;\n    if (_status.compare_exchange_strong(checkpointing, ColdBackupCheckpointed)) {\n        return true;\n    } else {\n        return false;\n    }\n}\nbool cold_backup_context::fail_upload(const char *failure_reason)\n{\n    int uploading = ColdBackupUploading;\n    int paused = ColdBackupPaused;\n    if (_status.compare_exchange_strong(uploading, ColdBackupFailed) ||\n        _status.compare_exchange_strong(paused, ColdBackupFailed)) {\n        strncpy(_reason, failure_reason, sizeof(_reason) - 1);\n        _reason[sizeof(_reason) - 1] = '\\0';\n        if (_owner_replica != nullptr) {\n            _owner_replica->get_replica_stub()->_counter_cold_backup_recent_fail_count->increment();\n        }\n        return true;\n    } else {\n        return false;\n    }\n}\n\nbool cold_backup_context::complete_upload()\n{\n    int uploading = ColdBackupUploading;\n    int paused = ColdBackupPaused;\n    if (_status.compare_exchange_strong(uploading, ColdBackupCompleted) ||\n        _status.compare_exchange_strong(paused, ColdBackupCompleted)) {\n        _progress.store(cold_backup_constant::PROGRESS_FINISHED);\n        if (_owner_replica != nullptr) {\n            _owner_replica->get_replica_stub()->_counter_cold_backup_recent_succ_count->increment();\n        }\n        return true;\n    } else {\n        return false;\n    }\n}\n\n// run in REPLICATION_LONG thread\nvoid cold_backup_context::check_backup_on_remote()\n{\n    // check whether current checkpoint file is exist on remote, and verify whether the checkpoint\n    // directory is exist\n    std::string current_chkpt_file = cold_backup::get_current_chkpt_file(\n        backup_root, request.app_name, request.pid, request.backup_id);\n    dist::block_service::create_file_request req;\n    req.file_name = current_chkpt_file;\n    req.ignore_metadata = false;\n\n    // incr the ref counter, and must release_ref() after callback is execute\n    add_ref();\n\n    block_service->create_file(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, current_chkpt_file](const dist::block_service::create_file_response &resp) {\n            if (!is_ready_for_check()) {\n                ddebug(\"%s: backup status has changed to %s, ignore checking backup on remote\",\n                       name,\n                       cold_backup_status_to_string(status()));\n                ignore_check();\n            } else if (resp.err == ERR_OK) {\n                const dist::block_service::block_file_ptr &file_handle = resp.file_handle;\n                dassert(file_handle != nullptr, \"\");\n                if (file_handle->get_md5sum().empty() && file_handle->get_size() <= 0) {\n                    ddebug(\"%s: check backup on remote, current_checkpoint file %s is not exist\",\n                           name,\n                           current_chkpt_file.c_str());\n                    complete_check(false);\n                } else {\n                    ddebug(\"%s: check backup on remote, current_checkpoint file %s is exist\",\n                           name,\n                           current_chkpt_file.c_str());\n                    read_current_chkpt_file(file_handle);\n                }\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block service create file timeout, retry after 10 seconds, file = %s\",\n                       name,\n                       current_chkpt_file.c_str());\n\n                // before retry, should add_ref(), and must release_ref() after retry\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this]() {\n                                     // before retry, should check whether the status is ready for\n                                     // check\n                                     if (!is_ready_for_check()) {\n                                         ddebug(\"%s: backup status has changed to %s, ignore \"\n                                                \"checking backup on remote\",\n                                                name,\n                                                cold_backup_status_to_string(status()));\n                                         ignore_check();\n                                     } else {\n                                         check_backup_on_remote();\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service create file failed, file = %s, err = %s\",\n                       name,\n                       current_chkpt_file.c_str(),\n                       resp.err.to_string());\n                fail_check(\"block service create file failed\");\n            }\n            release_ref();\n        });\n}\n\nvoid cold_backup_context::read_current_chkpt_file(\n    const dist::block_service::block_file_ptr &file_handle)\n{\n    dist::block_service::read_request req;\n    req.remote_pos = 0;\n    req.remote_length = -1;\n\n    add_ref();\n\n    file_handle->read(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, file_handle](const dist::block_service::read_response &resp) {\n            if (!is_ready_for_check()) {\n                ddebug(\"%s: backup status has changed to %s, ignore checking backup on remote\",\n                       name,\n                       cold_backup_status_to_string(status()));\n                ignore_check();\n            } else if (resp.err == ERR_OK) {\n                std::string chkpt_dirname(resp.buffer.data(), resp.buffer.length());\n                if (chkpt_dirname.empty()) {\n                    complete_check(false);\n                } else {\n                    ddebug(\"%s: after read current_checkpoint_file, check whether remote \"\n                           \"checkpoint dir = %s is exist\",\n                           name,\n                           chkpt_dirname.c_str());\n                    remote_chkpt_dir_exist(chkpt_dirname);\n                }\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: read remote file timeout, retry after 10s, file = %s\",\n                       name,\n                       file_handle->file_name().c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, file_handle]() {\n                                     if (!is_ready_for_check()) {\n                                         ddebug(\"%s: backup status has changed to %s, ignore \"\n                                                \"checking backup on remote\",\n                                                name,\n                                                cold_backup_status_to_string(status()));\n                                         ignore_check();\n                                     } else {\n                                         read_current_chkpt_file(file_handle);\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: read remote file failed, file = %s, err = %s\",\n                       name,\n                       file_handle->file_name().c_str(),\n                       resp.err.to_string());\n                fail_check(\"read remote file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::remote_chkpt_dir_exist(const std::string &chkpt_dirname)\n{\n    dist::block_service::ls_request req;\n    req.dir_name = cold_backup::get_replica_backup_path(\n        backup_root, request.app_name, request.pid, request.backup_id);\n\n    add_ref();\n\n    block_service->list_dir(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, chkpt_dirname](const dist::block_service::ls_response &resp) {\n            if (!is_ready_for_check()) {\n                ddebug(\"%s: backup status has changed to %s, ignore checking backup on remote\",\n                       name,\n                       cold_backup_status_to_string(status()));\n                ignore_check();\n            } else if (resp.err == ERR_OK) {\n                bool found_chkpt_dir = false;\n                for (const auto &entry : (*resp.entries)) {\n                    if (entry.is_directory && entry.entry_name == chkpt_dirname) {\n                        found_chkpt_dir = true;\n                        break;\n                    }\n                }\n                if (found_chkpt_dir) {\n                    ddebug(\"%s: remote checkpoint dir is already exist, so upload have already \"\n                           \"complete, remote_checkpoint_dirname = %s\",\n                           name,\n                           chkpt_dirname.c_str());\n                    complete_check(true);\n                } else {\n                    ddebug(\"%s: remote checkpoint dir is not exist, should re-upload checkpoint \"\n                           \"dir, remote_checkpoint_dirname = %s\",\n                           name,\n                           chkpt_dirname.c_str());\n                    complete_check(false);\n                }\n            } else if (resp.err == ERR_OBJECT_NOT_FOUND) {\n                ddebug(\"%s: remote checkpoint dir is not exist, should re-upload checkpoint dir, \"\n                       \"remote_checkpoint_dirname = %s\",\n                       name,\n                       chkpt_dirname.c_str());\n                complete_check(false);\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block service list remote dir timeout, retry after 10s, dirname = %s\",\n                       name,\n                       chkpt_dirname.c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, chkpt_dirname]() {\n                                     if (!is_ready_for_check()) {\n                                         ddebug(\"%s: backup status has changed to %s, ignore \"\n                                                \"checking backup on remote\",\n                                                name,\n                                                cold_backup_status_to_string(status()));\n                                         ignore_check();\n                                     } else {\n                                         remote_chkpt_dir_exist(chkpt_dirname);\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service list remote dir failed, dirname = %s, err = %s\",\n                       name,\n                       chkpt_dirname.c_str(),\n                       resp.err.to_string());\n                fail_check(\"list remote dir failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::upload_checkpoint_to_remote()\n{\n    if (!is_ready_for_upload()) {\n        ddebug(\"%s: backup status has changed to %s, ignore upload checkpoint\",\n               name,\n               cold_backup_status_to_string(status()));\n        return;\n    }\n\n    bool old_status = false;\n    // here, just allow one task to check upload status, and it will set _upload_status base on\n    // the result it has checked; But, because of upload_checkpoint_to_remote maybe call multi-times\n    // (for pause - uploading), so we use the atomic variant to implement\n    if (!_have_check_upload_status.compare_exchange_strong(old_status, true)) {\n        ddebug(\"%s: upload status has already been checked, start upload checkpoint dir directly\",\n               name);\n        on_upload_chkpt_dir();\n        return;\n    }\n\n    // check whether cold_backup_metadata is exist and verify cold_backup_metadata if exist\n    std::string metadata = cold_backup::get_remote_chkpt_meta_file(\n        backup_root, request.app_name, request.pid, request.backup_id);\n    dist::block_service::create_file_request req;\n    req.file_name = metadata;\n    req.ignore_metadata = false;\n\n    add_ref();\n\n    block_service->create_file(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, metadata](const dist::block_service::create_file_response &resp) {\n            if (resp.err == ERR_OK) {\n                dassert(resp.file_handle != nullptr, \"\");\n                if (resp.file_handle->get_md5sum().empty() && resp.file_handle->get_size() <= 0) {\n                    _upload_status.store(UploadUncomplete);\n                    ddebug(\"%s: check upload_status complete, cold_backup_metadata isn't exist, \"\n                           \"start upload checkpoint dir\",\n                           name);\n                    on_upload_chkpt_dir();\n                } else {\n                    ddebug(\"%s: cold_backup_metadata is exist, read it's context\", name);\n                    read_backup_metadata(resp.file_handle);\n                }\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block service create file timeout, retry after 10s, file = %s\",\n                       name,\n                       metadata.c_str());\n                // when create backup_metadata timeout, should reset _have_check_upload_status\n                // false to allow re-check\n                _have_check_upload_status.store(false);\n                add_ref();\n\n                tasking::enqueue(\n                    LPC_BACKGROUND_COLD_BACKUP,\n                    nullptr,\n                    [this]() {\n                        if (!is_ready_for_upload()) {\n                            ddebug(\"%s: backup status has changed to %s, stop check upload status\",\n                                   name,\n                                   cold_backup_status_to_string(status()));\n                        } else {\n                            upload_checkpoint_to_remote();\n                        }\n                        release_ref();\n                    },\n                    0,\n                    std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service create file failed, file = %s, err = %s\",\n                       name,\n                       metadata.c_str(),\n                       resp.err.to_string());\n                _have_check_upload_status.store(false);\n                fail_upload(\"block service create file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::read_backup_metadata(\n    const dist::block_service::block_file_ptr &file_handle)\n{\n    dist::block_service::read_request req;\n    req.remote_pos = 0;\n    req.remote_length = -1;\n\n    add_ref();\n\n    file_handle->read(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, file_handle](const dist::block_service::read_response &resp) {\n            if (resp.err == ERR_OK) {\n                ddebug(\"%s: read cold_backup_metadata succeed, verify it's context, file = %s\",\n                       name,\n                       file_handle->file_name().c_str());\n                verify_backup_metadata(resp.buffer);\n                on_upload_chkpt_dir();\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: read remote file timeout, retry after 10s, file = %s\",\n                       name,\n                       file_handle->file_name().c_str());\n                add_ref();\n\n                tasking::enqueue(\n                    LPC_BACKGROUND_COLD_BACKUP,\n                    nullptr,\n                    [this, file_handle] {\n                        if (!is_ready_for_upload()) {\n                            ddebug(\"%s: backup status has changed to %s, stop check upload status\",\n                                   name,\n                                   cold_backup_status_to_string(status()));\n                            _have_check_upload_status.store(false);\n                        } else {\n                            read_backup_metadata(file_handle);\n                        }\n                        release_ref();\n                    },\n                    0,\n                    std::chrono::seconds(10));\n            } else {\n                derror(\"%s: read remote file failed, file = %s, err = %s\",\n                       name,\n                       file_handle->file_name().c_str(),\n                       resp.err.to_string());\n                _have_check_upload_status.store(false);\n                fail_upload(\"read remote file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::verify_backup_metadata(const blob &value)\n{\n    cold_backup_metadata tmp;\n    if (value.length() > 0 && json::json_forwarder<cold_backup_metadata>::decode(value, tmp)) {\n        ddebug(\"%s: check upload status complete, checkpoint dir uploading has already complete\",\n               name);\n        _upload_status.store(UploadComplete);\n    } else {\n        ddebug(\"%s: check upload status complete, checkpoint dir uploading isn't complete yet\",\n               name);\n        _upload_status.store(UploadUncomplete);\n    }\n}\n\nvoid cold_backup_context::on_upload_chkpt_dir()\n{\n    if (_upload_status.load() == UploadInvalid || !is_ready_for_upload()) {\n        ddebug(\"%s: replica is not ready for uploading, ignore upload, cold_backup_status(%s)\",\n               name,\n               cold_backup_status_to_string(status()));\n        return;\n    }\n\n    if (_upload_status.load() == UploadComplete) {\n        // TODO: if call upload_checkpint_to_remote multi times, maybe write_current_chkpt_file\n        // multi times\n        std::string chkpt_dirname = cold_backup::get_remote_chkpt_dirname();\n        write_current_chkpt_file(chkpt_dirname);\n        return;\n    }\n\n    prepare_upload();\n\n    // prepare_upload maybe fail, so here check status\n    if (!is_ready_for_upload()) {\n        derror(\"%s: backup status has changed to %s, stop upload checkpoint dir\",\n               name,\n               cold_backup_status_to_string(status()));\n        return;\n    }\n\n    if (checkpoint_files.size() <= 0) {\n        ddebug(\"%s: checkpoint dir is empty, so upload is complete and just start write \"\n               \"backup_metadata\",\n               name);\n        bool old_status = false;\n        // using atomic variant _have_write_backup_metadata is to allow one task to\n        // write backup_metadata because on_upload_chkpt_dir maybe call multi-time\n        if (_have_write_backup_metadata.compare_exchange_strong(old_status, true)) {\n            write_backup_metadata();\n        }\n    } else {\n        ddebug(\"%s: start upload checkpoint dir, checkpoint dir = %s, total checkpoint file = %d\",\n               name,\n               checkpoint_dir.c_str(),\n               checkpoint_files.size());\n        std::vector<std::string> files;\n        if (!upload_complete_or_fetch_uncomplete_files(files)) {\n            for (auto &file : files) {\n                ddebug(\"%s: start upload checkpoint file to remote, file = %s\", name, file.c_str());\n                upload_file(file);\n            }\n        } else {\n            ddebug(\"%s: upload checkpoint dir to remote complete, total_file_cnt = %d\",\n                   name,\n                   checkpoint_files.size());\n            bool old_status = false;\n            if (_have_write_backup_metadata.compare_exchange_strong(old_status, true)) {\n                write_backup_metadata();\n            }\n        }\n    }\n}\n\nvoid cold_backup_context::prepare_upload()\n{\n    zauto_lock l(_lock);\n    // only need initialize once\n    if (_metadata.files.size() > 0) {\n        return;\n    }\n    _file_remain_cnt = checkpoint_files.size();\n\n    _metadata.checkpoint_decree = checkpoint_decree;\n    _metadata.checkpoint_timestamp = checkpoint_timestamp;\n    _metadata.checkpoint_total_size = checkpoint_file_total_size;\n    for (int32_t idx = 0; idx < checkpoint_files.size(); idx++) {\n        std::string &file = checkpoint_files[idx];\n        file_meta f_meta;\n        f_meta.name = file;\n        std::string file_full_path = ::dsn::utils::filesystem::path_combine(checkpoint_dir, file);\n        int64_t file_size = checkpoint_file_sizes[idx];\n        std::string file_md5;\n        if (::dsn::utils::filesystem::md5sum(file_full_path, file_md5) != ERR_OK) {\n            derror(\"%s: get local file size or md5 fail, file = %s\", name, file_full_path.c_str());\n            fail_upload(\"compute local file size or md5 failed\");\n            return;\n        }\n        f_meta.md5 = file_md5;\n        f_meta.size = file_size;\n        _metadata.files.emplace_back(f_meta);\n        _file_status.insert(std::make_pair(file, FileUploadUncomplete));\n        _file_infos.insert(std::make_pair(file, std::make_pair(file_size, file_md5)));\n    }\n    _upload_file_size.store(0);\n}\n\nvoid cold_backup_context::upload_file(const std::string &local_filename)\n{\n    std::string remote_chkpt_dir = cold_backup::get_remote_chkpt_dir(\n        backup_root, request.app_name, request.pid, request.backup_id);\n    dist::block_service::create_file_request req;\n    req.file_name = ::dsn::utils::filesystem::path_combine(remote_chkpt_dir, local_filename);\n    req.ignore_metadata = false;\n\n    add_ref();\n\n    block_service->create_file(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, local_filename](const dist::block_service::create_file_response &resp) {\n            if (resp.err == ERR_OK) {\n                const dist::block_service::block_file_ptr &file_handle = resp.file_handle;\n                dassert(file_handle != nullptr, \"\");\n                int64_t local_file_size = _file_infos.at(local_filename).first;\n                std::string md5 = _file_infos.at(local_filename).second;\n                std::string full_path_local_file =\n                    ::dsn::utils::filesystem::path_combine(checkpoint_dir, local_filename);\n                if (md5 == file_handle->get_md5sum() &&\n                    local_file_size == file_handle->get_size()) {\n                    ddebug(\"%s: checkpoint file already exist on remote, file = %s\",\n                           name,\n                           full_path_local_file.c_str());\n                    on_upload_file_complete(local_filename);\n                } else {\n                    ddebug(\"%s: start upload checkpoint file to remote, file = %s\",\n                           name,\n                           full_path_local_file.c_str());\n                    on_upload(file_handle, full_path_local_file);\n                }\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block service create file timeout, retry after 10s, file = %s\",\n                       name,\n                       local_filename.c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, local_filename]() {\n                                     // TODO: status change from ColdBackupUploading to\n                                     // ColdBackupPaused, and upload file timeout, but when callback\n                                     // is executed it catches the status(ColdBackupPaused)\n                                     // now, if status back to ColdBackupUploading very soon, and\n                                     // call upload_checkpoint_to_remote() here,\n                                     // upload_checkpoint_to_remote() maybe acquire the _lock first,\n                                     // then stop give back file(upload timeout), the file is still\n                                     // in uploading this file will not be uploaded until you call\n                                     // upload_checkpoint_to_remote() after it's given back\n                                     if (!is_ready_for_upload()) {\n                                         std::string full_path_local_file =\n                                             ::dsn::utils::filesystem::path_combine(checkpoint_dir,\n                                                                                    local_filename);\n                                         ddebug(\"%s: backup status has changed to %s, stop upload \"\n                                                \"checkpoint file to remote, file = %s\",\n                                                name,\n                                                cold_backup_status_to_string(status()),\n                                                full_path_local_file.c_str());\n                                         file_upload_uncomplete(local_filename);\n                                     } else {\n                                         upload_file(local_filename);\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service create file failed, file = %s, err = %s\",\n                       name,\n                       local_filename.c_str(),\n                       resp.err.to_string());\n                fail_upload(\"create file failed\");\n            }\n            if (resp.err != ERR_OK && _owner_replica != nullptr) {\n                _owner_replica->get_replica_stub()\n                    ->_counter_cold_backup_recent_upload_file_fail_count->increment();\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::on_upload(const dist::block_service::block_file_ptr &file_handle,\n                                    const std::string &full_path_local_file)\n{\n    dist::block_service::upload_request req;\n    req.input_local_name = full_path_local_file;\n\n    add_ref();\n\n    file_handle->upload(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, file_handle, full_path_local_file](\n            const dist::block_service::upload_response &resp) {\n            if (resp.err == ERR_OK) {\n                std::string local_filename =\n                    ::dsn::utils::filesystem::get_file_name(full_path_local_file);\n                dassert(_file_infos.at(local_filename).first ==\n                            static_cast<int64_t>(resp.uploaded_size),\n                        \"\");\n                ddebug(\"%s: upload checkpoint file complete, file = %s\",\n                       name,\n                       full_path_local_file.c_str());\n                on_upload_file_complete(local_filename);\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: upload checkpoint file timeout, retry after 10s, file = %s\",\n                       name,\n                       full_path_local_file.c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, file_handle, full_path_local_file]() {\n                                     if (!is_ready_for_upload()) {\n                                         derror(\"%s: backup status has changed to %s, stop upload \"\n                                                \"checkpoint file to remote, file = %s\",\n                                                name,\n                                                cold_backup_status_to_string(status()),\n                                                full_path_local_file.c_str());\n                                         std::string local_filename =\n                                             ::dsn::utils::filesystem::get_file_name(\n                                                 full_path_local_file);\n                                         file_upload_uncomplete(local_filename);\n                                     } else {\n                                         on_upload(file_handle, full_path_local_file);\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: upload checkpoint file to remote failed, file = %s, err = %s\",\n                       name,\n                       full_path_local_file.c_str(),\n                       resp.err.to_string());\n                fail_upload(\"upload checkpoint file to remote failed\");\n            }\n            if (resp.err != ERR_OK && _owner_replica != nullptr) {\n                _owner_replica->get_replica_stub()\n                    ->_counter_cold_backup_recent_upload_file_fail_count->increment();\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::write_backup_metadata()\n{\n    if (_upload_status.load() == UploadComplete) {\n        ddebug(\"%s: upload have already done, no need write metadata again\", name);\n        return;\n    }\n    std::string metadata = cold_backup::get_remote_chkpt_meta_file(\n        backup_root, request.app_name, request.pid, request.backup_id);\n    dist::block_service::create_file_request req;\n    req.file_name = metadata;\n    req.ignore_metadata = true;\n\n    add_ref();\n\n    block_service->create_file(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, metadata](const dist::block_service::create_file_response &resp) {\n            if (resp.err == ERR_OK) {\n                dassert(resp.file_handle != nullptr, \"\");\n                blob buffer = json::json_forwarder<cold_backup_metadata>::encode(_metadata);\n                // hold itself until callback is executed\n                add_ref();\n                ddebug(\"%s: create backup metadata file succeed, start to write file, file = %s\",\n                       name,\n                       metadata.c_str());\n                this->on_write(resp.file_handle, buffer, [this](bool succeed) {\n                    if (succeed) {\n                        std::string chkpt_dirname = cold_backup::get_remote_chkpt_dirname();\n                        _upload_status.store(UploadComplete);\n                        ddebug(\"%s: write backup metadata complete, write current checkpoint file\",\n                               name);\n                        write_current_chkpt_file(chkpt_dirname);\n                    }\n                    // NOTICE: write file fail will internal error be processed in on_write()\n                    release_ref();\n                });\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block service create file timeout, retry after 10s, file = %s\",\n                       name,\n                       metadata.c_str());\n                add_ref();\n\n                tasking::enqueue(\n                    LPC_BACKGROUND_COLD_BACKUP,\n                    nullptr,\n                    [this]() {\n                        if (!is_ready_for_upload()) {\n                            _have_write_backup_metadata.store(false);\n                            derror(\n                                \"%s: backup status has changed to %s, stop write backup_metadata\",\n                                name,\n                                cold_backup_status_to_string(status()));\n                        } else {\n                            write_backup_metadata();\n                        }\n                        release_ref();\n                    },\n                    0,\n                    std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service create file failed, file = %s, err = %s\",\n                       name,\n                       metadata.c_str(),\n                       resp.err.to_string());\n                _have_write_backup_metadata.store(false);\n                fail_upload(\"create file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::write_current_chkpt_file(const std::string &value)\n{\n    // before we write current checkpoint file, we can release the memory occupied by _metadata,\n    // _file_status and _file_infos, because even if write current checkpoint file failed, the\n    // backup_metadata is uploading succeed, so we will not re-upload\n    _metadata.files.clear();\n    _file_infos.clear();\n    _file_status.clear();\n\n    if (!is_ready_for_upload()) {\n        ddebug(\"%s: backup status has changed to %s, stop write current checkpoint file\",\n               name,\n               cold_backup_status_to_string(status()));\n        return;\n    }\n\n    std::string current_chkpt_file = cold_backup::get_current_chkpt_file(\n        backup_root, request.app_name, request.pid, request.backup_id);\n    dist::block_service::create_file_request req;\n    req.file_name = current_chkpt_file;\n    req.ignore_metadata = true;\n\n    add_ref();\n\n    block_service->create_file(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, value, current_chkpt_file](const dist::block_service::create_file_response &resp) {\n            if (resp.err == ERR_OK) {\n                dassert(resp.file_handle != nullptr, \"\");\n                auto len = value.length();\n                std::shared_ptr<char> buf = utils::make_shared_array<char>(len);\n                ::memcpy(buf.get(), value.c_str(), len);\n                blob write_buf(std::move(buf), static_cast<unsigned int>(len));\n                ddebug(\"%s: create current checkpoint file succeed, start write file ,file = %s\",\n                       name,\n                       current_chkpt_file.c_str());\n                add_ref();\n                this->on_write(resp.file_handle, write_buf, [this](bool succeed) {\n                    if (succeed) {\n                        complete_upload();\n                    }\n                    release_ref();\n                });\n            } else if (resp.err == ERR_TIMEOUT) {\n                derror(\"%s: block file create file timeout, retry after 10s, file = %s\",\n                       name,\n                       current_chkpt_file.c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, value]() {\n                                     if (!is_ready_for_upload()) {\n                                         ddebug(\"%s: backup status has changed to %s, stop write \"\n                                                \"current checkpoint file\",\n                                                name,\n                                                cold_backup_status_to_string(status()));\n                                     } else {\n                                         write_current_chkpt_file(value);\n                                     }\n\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                derror(\"%s: block service create file failed, file = %s, err = %s\",\n                       name,\n                       current_chkpt_file.c_str(),\n                       resp.err.to_string());\n                fail_upload(\"create file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::on_write(const dist::block_service::block_file_ptr &file_handle,\n                                   const blob &value,\n                                   const std::function<void(bool)> &callback)\n{\n    dassert(file_handle != nullptr, \"\");\n    dist::block_service::write_request req;\n    req.buffer = value;\n\n    add_ref();\n\n    file_handle->write(\n        std::move(req),\n        LPC_BACKGROUND_COLD_BACKUP,\n        [this, value, file_handle, callback](const dist::block_service::write_response &resp) {\n            if (resp.err == ERR_OK) {\n                ddebug(\"%s: write remote file succeed, file = %s\",\n                       name,\n                       file_handle->file_name().c_str());\n                callback(true);\n            } else if (resp.err == ERR_TIMEOUT) {\n                ddebug(\"%s: write remote file timeout, retry after 10s, file = %s\",\n                       name,\n                       file_handle->file_name().c_str());\n                add_ref();\n\n                tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP,\n                                 nullptr,\n                                 [this, file_handle, value, callback]() {\n                                     if (!is_ready_for_upload()) {\n                                         ddebug(\"%s: backup status has changed to %s, stop write \"\n                                                \"remote file, file = %s\",\n                                                name,\n                                                cold_backup_status_to_string(status()),\n                                                file_handle->file_name().c_str());\n                                     } else {\n                                         on_write(file_handle, value, callback);\n                                     }\n                                     release_ref();\n                                 },\n                                 0,\n                                 std::chrono::seconds(10));\n            } else {\n                // here, must call the callback to release_ref\n                callback(false);\n                derror(\"%s: write remote file failed, file = %s, err = %s\",\n                       name,\n                       file_handle->file_name().c_str(),\n                       resp.err.to_string());\n                fail_upload(\"write remote file failed\");\n            }\n            release_ref();\n            return;\n        });\n}\n\nvoid cold_backup_context::on_upload_file_complete(const std::string &local_filename)\n{\n    const int64_t &f_size = _file_infos.at(local_filename).first;\n    _upload_file_size.fetch_add(f_size);\n    file_upload_complete(local_filename);\n    if (_owner_replica != nullptr) {\n        _owner_replica->get_replica_stub()\n            ->_counter_cold_backup_recent_upload_file_succ_count->increment();\n        _owner_replica->get_replica_stub()->_counter_cold_backup_recent_upload_file_size->add(\n            f_size);\n    }\n    // update progress\n    // int a = 10; int b = 3; then  b/a = 0;\n    // double a = 10; double b = 3; then b/a = 0.3\n    auto total = static_cast<double>(checkpoint_file_total_size);\n    auto complete_size = static_cast<double>(_upload_file_size.load());\n\n    if (total <= complete_size) {\n        ddebug(\"%s: upload checkpoint to remote complete, checkpoint dir = %s, total file size = \"\n               \"%\" PRId64 \", file count = %d\",\n               name,\n               checkpoint_dir.c_str(),\n               static_cast<int64_t>(total),\n               checkpoint_files.size());\n        bool old_status = false;\n        if (_have_write_backup_metadata.compare_exchange_strong(old_status, true)) {\n            write_backup_metadata();\n        }\n        return;\n    } else {\n        dassert(total != 0.0, \"total = %\" PRId64 \"\", total);\n        update_progress(static_cast<int>(complete_size / total * 1000));\n        ddebug(\"%s: the progress of upload checkpoint is %d\", name, _progress.load());\n    }\n    if (is_ready_for_upload()) {\n        std::vector<std::string> upload_files;\n        upload_complete_or_fetch_uncomplete_files(upload_files);\n        for (auto &file : upload_files) {\n            ddebug(\"%s: start upload checkpoint file to remote, file = %s\", name, file.c_str());\n            upload_file(file);\n        }\n    }\n}\n\nbool cold_backup_context::upload_complete_or_fetch_uncomplete_files(std::vector<std::string> &files)\n{\n    bool upload_complete = false;\n\n    zauto_lock l(_lock);\n    if (_file_remain_cnt > 0 && _cur_upload_file_cnt < _max_concurrent_uploading_file_cnt) {\n        for (const auto &_pair : _file_status) {\n            if (_pair.second == file_status::FileUploadUncomplete) {\n                files.emplace_back(_pair.first);\n                _file_remain_cnt -= 1;\n                _file_status[_pair.first] = file_status::FileUploading;\n                _cur_upload_file_cnt += 1;\n            }\n            if (_file_remain_cnt <= 0 ||\n                _cur_upload_file_cnt >= _max_concurrent_uploading_file_cnt) {\n                break;\n            }\n        }\n    }\n    if (_file_remain_cnt <= 0 && _cur_upload_file_cnt <= 0) {\n        upload_complete = true;\n    }\n    return upload_complete;\n}\n\nvoid cold_backup_context::file_upload_uncomplete(const std::string &filename)\n{\n    zauto_lock l(_lock);\n\n    dassert(_cur_upload_file_cnt >= 1, \"cur_upload_file_cnt = %d\", _cur_upload_file_cnt);\n    _cur_upload_file_cnt -= 1;\n    _file_remain_cnt += 1;\n    _file_status[filename] = file_status::FileUploadUncomplete;\n}\n\nvoid cold_backup_context::file_upload_complete(const std::string &filename)\n{\n    zauto_lock l(_lock);\n\n    dassert(_cur_upload_file_cnt >= 1, \"cur_upload_file_cnt = %d\", _cur_upload_file_cnt);\n    _cur_upload_file_cnt -= 1;\n    _file_status[filename] = file_status::FileUploadComplete;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/cold_backup_context.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/dist/block_service.h>\n\n#include \"common/backup_common.h\"\n\nclass replication_service_test_app;\n\nnamespace dsn {\nnamespace replication {\n\nclass replica;\n\n//\n//                                  ColdBackupInvalid\n//                                           |\n//                                           V\n//                         |<------ ColdBackupChecking ---------------------------------->|\n//                         |                 |                                            |\n//                         |                 V                                            |\n//                         |        ColdBackupChecked ----------------------------------->|\n//                         |                 |                                            |\n//                         |                 V                                            |\n// ColdBackupCompleted <---|        ColdBackupCheckpointing ----------------------------->|\n//          |              |                 |                                            |\n//          |              |                 V                                            |--->\n//          ColdBackupCanceled\n//          |              |        ColdBackupCheckpointed ------------------------------>|\n//          |              |                 |                                            |\n//          |              |                 V                                            |\n//          |              |<------ ColdBackupUploading  <======> ColdBackupPaused ------>|\n//          |                                |                            |               |\n//          |                                |____________________________|               |\n//          |                                               |                             |\n//          |                                               V                             |\n//          |                                       ColdBackupFailed -------------------->|\n//          |                                                                             |\n//          |---------------------------------------------------------------------------->|\n//\nenum cold_backup_status\n{\n    ColdBackupInvalid = 0,\n    ColdBackupChecking,\n    ColdBackupChecked,\n    ColdBackupCheckpointing,\n    ColdBackupCheckpointed,\n    ColdBackupUploading,\n    ColdBackupPaused,\n    ColdBackupCanceled,\n    ColdBackupCompleted,\n    ColdBackupFailed\n};\nconst char *cold_backup_status_to_string(cold_backup_status status);\n\nstruct cold_backup_metadata\n{\n    int64_t checkpoint_decree;\n    int64_t checkpoint_timestamp;\n    std::vector<file_meta> files;\n    int64_t checkpoint_total_size;\n    DEFINE_JSON_SERIALIZATION(checkpoint_decree, checkpoint_timestamp, files, checkpoint_total_size)\n};\n\n//\n// the process of uploading the checkpoint directory to block filesystem:\n//      1, upload all the file of the checkpoint to block filesystem\n//      2, write a cold_backup_metadata to block filesystem(which includes all the file's name, size\n//         and md5 and so on)\n//      3, write a current_checkpoint file to block filesystem, which is used to mark which\n//         checkpoint is invalid\n//\n\n//\n// the process of check whether uploading is finished on block filesystem:\n//      1, check whether the current checkpoint file exist, if exist continue, otherwise not finish\n//      2, read the context of the current checkpoint file, the context of this file is the valid\n//         checkpoint dirname on block filesystem\n//      3, verify whether the checkpoint dirname is exist, if exist uploading is already finished,\n//         otherwise uploading is not finished\n//\n\nclass cold_backup_context : public ref_counter\n{\npublic:\n    explicit cold_backup_context(replica *r_,\n                                 const backup_request &request_,\n                                 int max_upload_file_cnt)\n        : request(request_),\n          block_service(nullptr),\n          checkpoint_decree(0),\n          checkpoint_timestamp(0),\n          durable_decree_when_checkpoint(-1),\n          checkpoint_file_total_size(0),\n          _status(ColdBackupInvalid),\n          _progress(0),\n          _upload_file_size(0),\n          _have_check_upload_status(false),\n          _have_write_backup_metadata(false),\n          _upload_status(UploadInvalid),\n          _max_concurrent_uploading_file_cnt(max_upload_file_cnt),\n          _cur_upload_file_cnt(0),\n          _file_remain_cnt(0),\n          _owner_replica(r_),\n          _start_time_ms(0)\n    {\n        sprintf(name,\n                \"backup{%d.%d.%s.%\" PRId64 \"}\",\n                request.pid.get_app_id(),\n                request.pid.get_partition_index(),\n                request.policy.policy_name.c_str(),\n                request.backup_id);\n        memset(_reason, 0, sizeof(_reason));\n    }\n\n    ~cold_backup_context() {}\n\n    // cancel backup.\n    //   {*} --> ColdBackupCanceled\n    //\n    // Will be called in replication thread.\n    void cancel();\n\n    // start checking backup on remote.\n    //   ColdBackupInvalid --> ColdBackupChecking\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupChecking.\n    bool start_check();\n\n    // ignore checking backup on remote and switch backward status.\n    //   ColdBackupChecking --> ColdBackupInvalid\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupInvalid.\n    bool ignore_check()\n    {\n        int checking = ColdBackupChecking;\n        return _status.compare_exchange_strong(checking, ColdBackupInvalid);\n    }\n\n    // mark failed when checking backup on remote.\n    //   ColdBackupChecking --> ColdBackupFailed\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupFailed.\n    bool fail_check(const char *failure_reason);\n\n    // complete checking backup on remote.\n    //   ColdBackupChecking --> { ColdBackupChecked | ColdBackupCompleted }\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupChecked or ColdBackupCompleted.\n    bool complete_check(bool uploaded);\n\n    // start generating checkpoint.\n    //   ColdBackupChecked --> ColdBackupCheckpointing\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupCheckpointing.\n    bool start_checkpoint();\n\n    // ignore generating checkpoint and switch backward status.\n    //   ColdBackupCheckpointing --> ColdBackupChecked\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupChecked.\n    bool ignore_checkpoint()\n    {\n        int checkpointing = ColdBackupCheckpointing;\n        return _status.compare_exchange_strong(checkpointing, ColdBackupChecked);\n    }\n\n    // mark failed when generating checkpoint.\n    //   ColdBackupCheckpointing --> ColdBackupFailed\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupFailed.\n    bool fail_checkpoint(const char *failure_reason);\n\n    // complete generating checkpoint.\n    //   ColdBackupCheckpointing --> ColdBackupCheckpointed\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupCheckpointed.\n    bool complete_checkpoint();\n\n    // start uploading checkpoint to remote.\n    //   { ColdBackupCheckpointed | ColdBackupPaused } --> ColdBackupUploading\n    //\n    // Will be called in replication thread.\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupUploading.\n    bool start_upload()\n    {\n        int checkpointed = ColdBackupCheckpointed;\n        int paused = ColdBackupPaused;\n        return _status.compare_exchange_strong(checkpointed, ColdBackupUploading) ||\n               _status.compare_exchange_strong(paused, ColdBackupUploading);\n    }\n\n    // mark failed when uploading checkpoint to remote.\n    //   { ColdBackupUploading | ColdBackupPaused } --> ColdBackupFailed\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupFailed.\n    bool fail_upload(const char *failure_reason);\n\n    // complete uploading checkpoint to remote.\n    //   { ColdBackupUploading | ColdBackupPaused } --> ColdBackupCompleted\n    // Returns:\n    //   - true if status is successfully changed to ColdBackupCompleted.\n    bool complete_upload();\n\n    // update progress.\n    // Progress should be in range of [0, 1000].\n    void update_progress(int progress)\n    {\n        dassert(progress >= 0 && progress <= cold_backup_constant::PROGRESS_FINISHED,\n                \"invalid progress %d\",\n                progress);\n        _progress.store(progress);\n    }\n\n    // check if it is ready for checking.\n    bool is_ready_for_check() const { return _status.load() == ColdBackupChecking; }\n\n    // check if it is ready for checkpointing.\n    bool is_checkpointing() const { return _status.load() == ColdBackupCheckpointing; }\n\n    // check if it is ready for uploading.\n    bool is_ready_for_upload() const { return _status.load() == ColdBackupUploading; }\n\n    // get current status.\n    cold_backup_status status() const { return (cold_backup_status)_status.load(); }\n\n    // get current progress.\n    int progress() const { return _progress.load(); }\n\n    // get failure reason.\n    const char *reason() const { return _reason; }\n\n    // check if backup is aleady exist on remote.\n    // Preconditions:\n    //   - name/request are set\n    //   - checkpoint_dir/checkpoint_decree/checkpoint_files are not set\n    //   - status is one of { ColdBackupChecking, ColdBackupCanceled }\n    // Will be called in background thread.\n    void check_backup_on_remote();\n\n    // upload backup checkpoint to remote.\n    // Preconditions:\n    //   - name/request are set\n    //   - checkpoint_dir/checkpoint_decree/checkpoint_files are set\n    //   - status is one of { ColdBackupUploading, ColdBackupPaused, ColdBackupCanceled }\n    // Will be called in background thread.\n    void upload_checkpoint_to_remote();\n\n    uint64_t get_start_time_ms() { return _start_time_ms; }\n\n    uint64_t get_upload_file_size() { return _upload_file_size.load(); }\n\n    int64_t get_checkpoint_total_size() { return checkpoint_file_total_size; }\n\nprivate:\n    void read_current_chkpt_file(const dist::block_service::block_file_ptr &file_handle);\n    void remote_chkpt_dir_exist(const std::string &chkpt_dirname);\n\n    void read_backup_metadata(const dist::block_service::block_file_ptr &file_handle);\n    // value is a json string, verify it's validity\n    // validity means uploading checkpoint directory complete, so just write_current_chkpt_file\n    // otherwise, upload checkpoint directory\n    void verify_backup_metadata(const blob &value);\n    // after upload_checkpoint_directory ---> write_backup_metadata --> write_current_chkpt_file -->\n    // notify meta\n    void write_backup_metadata();\n\n    void write_current_chkpt_file(const std::string &value);\n    // write value to file, if succeed then callback(true), else callback(false)\n    void on_write(const dist::block_service::block_file_ptr &file_handle,\n                  const blob &value,\n                  const std::function<void(bool)> &callback);\n    void prepare_upload();\n    void on_upload_chkpt_dir();\n    void upload_file(const std::string &local_filename);\n    void on_upload(const dist::block_service::block_file_ptr &file_handle,\n                   const std::string &full_path_local_file);\n    void on_upload_file_complete(const std::string &local_filename);\n\n    // functions access the structure protected by _lock\n    // return:\n    //  -- true, uploading is complete\n    //  -- false, uploading is not complete; and put uncomplete file into 'files'\n    bool upload_complete_or_fetch_uncomplete_files(std::vector<std::string> &files);\n    void file_upload_uncomplete(const std::string &filename);\n    void file_upload_complete(const std::string &filename);\n\npublic:\n    /// the following variables are public, and will only be set once, and will not be changed once\n    /// set.\n    char name[256]; // backup{<app_id>.<partition_index>.<policy_name>.<backup_id>}\n    // all logging should print the name\n    backup_request request;\n    dist::block_service::block_filesystem *block_service;\n    std::string backup_root;\n    decree checkpoint_decree;\n    int64_t checkpoint_timestamp;\n    decree durable_decree_when_checkpoint;\n    std::string checkpoint_dir;\n    std::vector<std::string> checkpoint_files;\n    std::vector<int64_t> checkpoint_file_sizes;\n    int64_t checkpoint_file_total_size;\n\nprivate:\n    friend class ::replication_service_test_app;\n\n    /// state variables\n    std::atomic_int _status;\n    std::atomic_int _progress; // [0,1000], 1000 means completed\n    char _reason[1024];        // failure reason\n\n    std::atomic_llong _upload_file_size;\n    // TODO: if chechpoint directory has many files, cold_backup_metadata may\n    // occupy large amount of memory\n    // for example, if a single file occupy 32B, then 1,000,000 files may occupy 32MB\n    cold_backup_metadata _metadata;\n\n    enum upload_status\n    {\n        UploadInvalid = 0,\n        UploadUncomplete,\n        UploadComplete\n    };\n    enum file_status\n    {\n        FileUploadUncomplete = 0,\n        FileUploading,\n        FileUploadComplete\n    };\n\n    // two atomic variants is to ensure check_upload_status and write_backup_metadata just be\n    // executed once\n    std::atomic_bool _have_check_upload_status;\n    std::atomic_bool _have_write_backup_metadata;\n\n    std::atomic_int _upload_status;\n\n    int32_t _max_concurrent_uploading_file_cnt;\n    // filename -> <filesize, md5>\n    std::map<std::string, std::pair<int64_t, std::string>> _file_infos;\n\n    zlock _lock; // lock the structure below\n    std::map<std::string, file_status> _file_status;\n    int32_t _cur_upload_file_cnt;\n    int32_t _file_remain_cnt;\n\n    replica *_owner_replica;\n    uint64_t _start_time_ms;\n};\n\ntypedef dsn::ref_ptr<cold_backup_context> cold_backup_context_ptr;\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/replica_backup_manager.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica_backup_manager.h\"\n#include \"cold_backup_context.h\"\n#include \"replica/replica.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/replication/replication_app_base.h>\n\nnamespace dsn {\nnamespace replication {\n\n// returns true if this checkpoint dir belongs to the policy\nstatic bool is_policy_checkpoint(const std::string &chkpt_dirname, const std::string &policy_name)\n{\n    std::vector<std::string> strs;\n    utils::split_args(chkpt_dirname.c_str(), strs, '.');\n    // backup_tmp.<policy_name>.* or backup.<policy_name>.*\n    return strs.size() >= 2 &&\n           (strs[0] == std::string(\"backup_tmp\") || strs[0] == std::string(\"backup\")) &&\n           strs[1] == policy_name;\n}\n\n// get all backup checkpoint dirs which belong to the policy\nstatic bool get_policy_checkpoint_dirs(const std::string &dir,\n                                       const std::string &policy,\n                                       /*out*/ std::vector<std::string> &chkpt_dirs)\n{\n    chkpt_dirs.clear();\n    // list sub dirs\n    std::vector<std::string> sub_dirs;\n    if (!utils::filesystem::get_subdirectories(dir, sub_dirs, false)) {\n        derror_f(\"list sub dirs of dir {} failed\", dir.c_str());\n        return false;\n    }\n\n    for (std::string &d : sub_dirs) {\n        std::string dirname = utils::filesystem::get_file_name(d);\n        if (is_policy_checkpoint(dirname, policy)) {\n            chkpt_dirs.push_back(std::move(dirname));\n        }\n    }\n    return true;\n}\n\nreplica_backup_manager::replica_backup_manager(replica *r) : replica_base(r), _replica(r) {}\n\nreplica_backup_manager::~replica_backup_manager()\n{\n    if (_collect_info_timer != nullptr) {\n        _collect_info_timer->cancel(true);\n    }\n}\n\nvoid replica_backup_manager::on_clear_cold_backup(const backup_clear_request &request)\n{\n    _replica->_checker.only_one_thread_access();\n\n    auto find = _replica->_cold_backup_contexts.find(request.policy_name);\n    if (find != _replica->_cold_backup_contexts.end()) {\n        cold_backup_context_ptr backup_context = find->second;\n        if (backup_context->is_checkpointing()) {\n            ddebug_replica(\n                \"{}: delay clearing obsoleted cold backup context, cause backup_status == \"\n                \"ColdBackupCheckpointing\",\n                backup_context->name);\n            tasking::enqueue(LPC_REPLICATION_COLD_BACKUP,\n                             &_replica->_tracker,\n                             [this, request]() { on_clear_cold_backup(request); },\n                             get_gpid().thread_hash(),\n                             std::chrono::seconds(100));\n            return;\n        }\n\n        _replica->_cold_backup_contexts.erase(request.policy_name);\n    }\n\n    background_clear_backup_checkpoint(request.policy_name);\n}\n\nvoid replica_backup_manager::start_collect_backup_info()\n{\n    if (_collect_info_timer == nullptr) {\n        _collect_info_timer =\n            tasking::enqueue_timer(LPC_PER_REPLICA_COLLECT_INFO_TIMER,\n                                   &_replica->_tracker,\n                                   [this]() { collect_backup_info(); },\n                                   std::chrono::milliseconds(_replica->options()->gc_interval_ms),\n                                   get_gpid().thread_hash());\n    }\n}\n\nvoid replica_backup_manager::collect_backup_info()\n{\n    uint64_t cold_backup_running_count = 0;\n    uint64_t cold_backup_max_duration_time_ms = 0;\n    uint64_t cold_backup_max_upload_file_size = 0;\n    uint64_t now_ms = dsn_now_ms();\n\n    // collect backup info from all of the cold backup contexts\n    for (const auto &p : _replica->_cold_backup_contexts) {\n        const cold_backup_context_ptr &backup_context = p.second;\n        cold_backup_status backup_status = backup_context->status();\n        if (_replica->status() == partition_status::type::PS_PRIMARY) {\n            if (backup_status > ColdBackupInvalid && backup_status < ColdBackupCanceled) {\n                cold_backup_running_count++;\n            }\n        } else if (_replica->status() == partition_status::type::PS_SECONDARY) {\n            // secondary end backup with status ColdBackupCheckpointed\n            if (backup_status > ColdBackupInvalid && backup_status < ColdBackupCheckpointed) {\n                cold_backup_running_count++;\n            }\n        }\n\n        if (backup_status == ColdBackupUploading) {\n            cold_backup_max_duration_time_ms = std::max(\n                cold_backup_max_duration_time_ms, now_ms - backup_context->get_start_time_ms());\n            cold_backup_max_upload_file_size =\n                std::max(cold_backup_max_upload_file_size, backup_context->get_upload_file_size());\n        }\n    }\n\n    _replica->_cold_backup_running_count.store(cold_backup_running_count);\n    _replica->_cold_backup_max_duration_time_ms.store(cold_backup_max_duration_time_ms);\n    _replica->_cold_backup_max_upload_file_size.store(cold_backup_max_upload_file_size);\n}\n\nvoid replica_backup_manager::background_clear_backup_checkpoint(const std::string &policy_name)\n{\n    ddebug_replica(\"schedule to clear all checkpoint dirs of policy({}) after {} minutes\",\n                   policy_name,\n                   _replica->options()->cold_backup_checkpoint_reserve_minutes);\n    tasking::enqueue(\n        LPC_BACKGROUND_COLD_BACKUP,\n        &_replica->_tracker,\n        [this, policy_name]() { clear_backup_checkpoint(policy_name); },\n        get_gpid().thread_hash(),\n        std::chrono::minutes(_replica->options()->cold_backup_checkpoint_reserve_minutes));\n}\n\n// clear all checkpoint dirs of the policy\nvoid replica_backup_manager::clear_backup_checkpoint(const std::string &policy_name)\n{\n    ddebug_replica(\"clear all checkpoint dirs of policy({})\", policy_name);\n    auto backup_dir = _replica->_app->backup_dir();\n    if (!utils::filesystem::directory_exists(backup_dir)) {\n        return;\n    }\n\n    // Find the corresponding checkpoint dirs with policy name\n    std::vector<std::string> chkpt_dirs;\n    if (!get_policy_checkpoint_dirs(backup_dir, policy_name, chkpt_dirs)) {\n        dwarn_replica(\"get checkpoint dirs in backup dir({}) failed\", backup_dir);\n        return;\n    }\n\n    // Remove these checkpoint dirs\n    for (const std::string &dirname : chkpt_dirs) {\n        std::string full_path = utils::filesystem::path_combine(backup_dir, dirname);\n        if (utils::filesystem::remove_path(full_path)) {\n            ddebug_replica(\"remove backup checkpoint dir({}) succeed\", full_path);\n        } else {\n            dwarn_replica(\"remove backup checkpoint dir({}) failed\", full_path);\n        }\n    }\n}\n\nvoid replica_backup_manager::send_clear_request_to_secondaries(const gpid &pid,\n                                                               const std::string &policy_name)\n{\n    backup_clear_request request;\n    request.__set_pid(pid);\n    request.__set_policy_name(policy_name);\n\n    for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n        rpc::call_one_way_typed(\n            target_address, RPC_CLEAR_COLD_BACKUP, request, get_gpid().thread_hash());\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/replica_backup_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/replication/replica_base.h>\n#include <dsn/dist/replication/replication_types.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass replica;\nclass replica_backup_manager : replica_base\n{\npublic:\n    explicit replica_backup_manager(replica *r);\n    ~replica_backup_manager();\n\n    void on_clear_cold_backup(const backup_clear_request &request);\n    void start_collect_backup_info();\n\nprivate:\n    void clear_backup_checkpoint(const std::string &policy_name);\n    void send_clear_request_to_secondaries(const gpid &pid, const std::string &policy_name);\n    void background_clear_backup_checkpoint(const std::string &policy_name);\n    void collect_backup_info();\n\n    replica *_replica;\n    dsn::task_ptr _collect_info_timer;\n\n    friend class replica;\n    friend class replica_backup_manager_test;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/replica_backup_server.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica_backup_server.h\"\n#include \"replica_backup_manager.h\"\n#include \"replica/replica.h\"\n#include \"replica/replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nreplica_backup_server::replica_backup_server(const replica_stub *rs) : _stub(rs)\n{\n    dsn_rpc_register_handler(RPC_COLD_BACKUP, \"cold_backup\", [this](message_ex *msg) {\n        on_cold_backup(backup_rpc::auto_reply(msg));\n    });\n    dsn_rpc_register_handler(RPC_CLEAR_COLD_BACKUP, \"clear_cold_backup\", [this](message_ex *msg) {\n        backup_clear_request clear_req;\n        unmarshall(msg, clear_req);\n        on_clear_cold_backup(clear_req);\n    });\n}\n\nvoid replica_backup_server::on_cold_backup(backup_rpc rpc)\n{\n    const backup_request &request = rpc.request();\n    backup_response &response = rpc.response();\n\n    ddebug(\"received cold backup request: backup{%s.%s.%\" PRId64 \"}\",\n           request.pid.to_string(),\n           request.policy.policy_name.c_str(),\n           request.backup_id);\n    response.pid = request.pid;\n    response.policy_name = request.policy.policy_name;\n    response.backup_id = request.backup_id;\n\n    if (_stub->options().cold_backup_root.empty()) {\n        derror(\"backup{%s.%s.%\" PRId64\n               \"}: cold_backup_root is empty, response ERR_OPERATION_DISABLED\",\n               request.pid.to_string(),\n               request.policy.policy_name.c_str(),\n               request.backup_id);\n        response.err = ERR_OPERATION_DISABLED;\n        return;\n    }\n\n    replica_ptr rep = _stub->get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->on_cold_backup(request, response);\n    } else {\n        derror(\"backup{%s.%s.%\" PRId64 \"}: replica not found, response ERR_OBJECT_NOT_FOUND\",\n               request.pid.to_string(),\n               request.policy.policy_name.c_str(),\n               request.backup_id);\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_backup_server::on_clear_cold_backup(const backup_clear_request &request)\n{\n    ddebug_f(\"receive clear cold backup request: backup({}.{})\",\n             request.pid.to_string(),\n             request.policy_name.c_str());\n\n    replica_ptr rep = _stub->get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->get_backup_manager()->on_clear_cold_backup(request);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/replica_backup_server.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/cpp/rpc_holder.h>\n\n#include \"common/backup_common.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_stub;\n\n// A server distributes the cold-backup task to the targeted replica.\nclass replica_backup_server\n{\npublic:\n    explicit replica_backup_server(const replica_stub *rs);\n\nprivate:\n    void on_cold_backup(backup_rpc rpc);\n\n    void on_clear_cold_backup(const backup_clear_request &request);\n\nprivate:\n    const replica_stub *_stub;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replica_backup_test)\n\nset(MY_PROJ_SRC \"\")\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_meta_server\n        dsn_replica_server\n        dsn_replication_common\n        dsn.block_service\n        dsn.block_service.local\n        dsn.block_service.fds\n        dsn_utils\n        hashtable\n        gtest\n)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem)\n\nset(MY_BINPLACES\n        config-test.ini\n        run.sh\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/backup/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\n\n[apps.replica]\ntype = replica\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[core]\ntool = nativerun\n\n[tools.simple_logger]\nstderr_start_level = LOG_LEVEL_WARNING\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[replication]\ncluster_name = master-cluster\n\n[duplication-group]\nmaster-cluster = 1\nslave-cluster  = 2\n"
  },
  {
    "path": "src/replica/backup/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/replica/backup/test/replica_backup_manager_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/test/replica_test_base.h\"\n#include \"replica/backup/replica_backup_manager.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_backup_manager_test : public replica_test_base\n{\npublic:\n    void clear_backup_checkpoint(const std::string &policy_name)\n    {\n        _replica->get_backup_manager()->clear_backup_checkpoint(policy_name);\n    }\n};\n\nTEST_F(replica_backup_manager_test, clear_cold_backup)\n{\n    std::string policy_name = \"test_policy\";\n\n    // create policy dir: <backup_dir>/backup.<policy_name>.*\n    std::string policy_dir = _replica->get_app()->backup_dir() + \"/backup.\" + policy_name;\n    utils::filesystem::create_directory(policy_dir);\n\n    // clear policy dir\n    clear_backup_checkpoint(policy_name);\n    ASSERT_FALSE(utils::filesystem::directory_exists(policy_dir));\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/backup/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\n./dsn_replica_backup_test\n\nif [ $? -ne 0 ]; then\n    tail -n 100 data/log/log.1.txt\n    if [ -f core ]; then\n        gdb ./dsn_replica_backup_test core -ex \"bt\"\n    fi\n    exit 1\nfi\n"
  },
  {
    "path": "src/replica/bulk_load/replica_bulk_loader.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/block_service.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/filesystem.h>\n\n#include \"replica_bulk_loader.h\"\n#include \"replica/disk_cleaner.h\"\n\nnamespace dsn {\nnamespace replication {\n\nreplica_bulk_loader::replica_bulk_loader(replica *r)\n    : replica_base(r), _replica(r), _stub(r->get_replica_stub())\n{\n}\n\nreplica_bulk_loader::~replica_bulk_loader() {}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::on_bulk_load(const bulk_load_request &request,\n                                       /*out*/ bulk_load_response &response)\n{\n    _replica->_checker.only_one_thread_access();\n\n    response.pid = request.pid;\n    response.app_name = request.app_name;\n    response.err = ERR_OK;\n\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn_replica(\"receive bulk load request with wrong status {}\", enum_to_string(status()));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    if (request.ballot != get_ballot()) {\n        dwarn_replica(\n            \"receive bulk load request with wrong version, remote ballot={}, local ballot={}\",\n            request.ballot,\n            get_ballot());\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    ddebug_replica(\"receive bulk load request, remote provider = {}, remote_root_path = {}, \"\n                   \"cluster_name = {}, app_name = {}, \"\n                   \"meta_bulk_load_status = {}, local bulk_load_status = {}\",\n                   request.remote_provider_name,\n                   request.remote_root_path,\n                   request.cluster_name,\n                   request.app_name,\n                   enum_to_string(request.meta_bulk_load_status),\n                   enum_to_string(_status));\n\n    error_code ec = do_bulk_load(request.app_name,\n                                 request.meta_bulk_load_status,\n                                 request.cluster_name,\n                                 request.remote_provider_name,\n                                 request.remote_root_path);\n    if (ec != ERR_OK) {\n        response.err = ec;\n        response.primary_bulk_load_status = _status;\n        return;\n    }\n\n    report_bulk_load_states_to_meta(\n        request.meta_bulk_load_status, request.query_bulk_load_metadata, response);\n    if (response.err != ERR_OK) {\n        return;\n    }\n\n    broadcast_group_bulk_load(request);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::broadcast_group_bulk_load(const bulk_load_request &meta_req)\n{\n    if (!_replica->_primary_states.learners.empty()) {\n        dwarn_replica(\"has learners, skip broadcast group bulk load request\");\n        return;\n    }\n\n    if (!_replica->_primary_states.group_bulk_load_pending_replies.empty()) {\n        dwarn_replica(\"{} group bulk_load replies are still pending, cancel it firstly\",\n                      _replica->_primary_states.group_bulk_load_pending_replies.size());\n        for (auto &kv : _replica->_primary_states.group_bulk_load_pending_replies) {\n            CLEANUP_TASK_ALWAYS(kv.second);\n        }\n        _replica->_primary_states.group_bulk_load_pending_replies.clear();\n    }\n\n    ddebug_replica(\"start to broadcast group bulk load\");\n\n    for (const auto &addr : _replica->_primary_states.membership.secondaries) {\n        if (addr == _stub->_primary_address)\n            continue;\n\n        auto request = make_unique<group_bulk_load_request>();\n        request->app_name = _replica->_app_info.app_name;\n        request->target_address = addr;\n        _replica->_primary_states.get_replica_config(partition_status::PS_SECONDARY,\n                                                     request->config);\n        request->cluster_name = meta_req.cluster_name;\n        request->provider_name = meta_req.remote_provider_name;\n        request->meta_bulk_load_status = meta_req.meta_bulk_load_status;\n        request->remote_root_path = meta_req.remote_root_path;\n\n        ddebug_replica(\"send group_bulk_load_request to {}\", addr.to_string());\n\n        group_bulk_load_rpc rpc(\n            std::move(request), RPC_GROUP_BULK_LOAD, 0_ms, 0, get_gpid().thread_hash());\n        auto callback_task = rpc.call(addr, tracker(), [this, rpc](error_code err) mutable {\n            on_group_bulk_load_reply(err, rpc.request(), rpc.response());\n        });\n        _replica->_primary_states.group_bulk_load_pending_replies[addr] = callback_task;\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::on_group_bulk_load(const group_bulk_load_request &request,\n                                             /*out*/ group_bulk_load_response &response)\n{\n    _replica->_checker.only_one_thread_access();\n\n    response.err = ERR_OK;\n\n    if (request.config.ballot < get_ballot()) {\n        response.err = ERR_VERSION_OUTDATED;\n        dwarn_replica(\n            \"receive outdated group_bulk_load request, request ballot({}) VS local ballot({})\",\n            request.config.ballot,\n            get_ballot());\n        return;\n    }\n    if (request.config.ballot > get_ballot()) {\n        response.err = ERR_INVALID_STATE;\n        dwarn_replica(\"receive group_bulk_load request, local ballot is outdated, request \"\n                      \"ballot({}) VS local ballot({})\",\n                      request.config.ballot,\n                      get_ballot());\n        return;\n    }\n    if (status() != request.config.status) {\n        response.err = ERR_INVALID_STATE;\n        dwarn_replica(\"status changed, status should be {}, but {}\",\n                      enum_to_string(request.config.status),\n                      enum_to_string(status()));\n        return;\n    }\n\n    ddebug_replica(\"receive group_bulk_load request, primary address = {}, ballot = {}, \"\n                   \"meta bulk_load_status = {}, local bulk_load_status = {}\",\n                   request.config.primary.to_string(),\n                   request.config.ballot,\n                   enum_to_string(request.meta_bulk_load_status),\n                   enum_to_string(_status));\n\n    error_code ec = do_bulk_load(request.app_name,\n                                 request.meta_bulk_load_status,\n                                 request.cluster_name,\n                                 request.provider_name,\n                                 request.remote_root_path);\n    if (ec != ERR_OK) {\n        response.err = ec;\n        response.status = _status;\n        return;\n    }\n\n    report_bulk_load_states_to_primary(request.meta_bulk_load_status, response);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::on_group_bulk_load_reply(error_code err,\n                                                   const group_bulk_load_request &req,\n                                                   const group_bulk_load_response &resp)\n{\n    _replica->_checker.only_one_thread_access();\n\n    if (partition_status::PS_PRIMARY != status()) {\n        derror_replica(\"replica status={}, should be {}\",\n                       enum_to_string(status()),\n                       enum_to_string(partition_status::PS_PRIMARY));\n        return;\n    }\n\n    _replica->_primary_states.group_bulk_load_pending_replies.erase(req.target_address);\n\n    if (err != ERR_OK) {\n        derror_replica(\"failed to receive group_bulk_load_reply from {}, error = {}\",\n                       req.target_address.to_string(),\n                       err.to_string());\n        _replica->_primary_states.reset_node_bulk_load_states(req.target_address);\n        return;\n    }\n\n    if (resp.err != ERR_OK) {\n        derror_replica(\"receive group_bulk_load response from {} failed, error = {}\",\n                       req.target_address.to_string(),\n                       resp.err.to_string());\n        _replica->_primary_states.reset_node_bulk_load_states(req.target_address);\n        return;\n    }\n\n    if (req.config.ballot != get_ballot()) {\n        derror_replica(\"recevied wrong group_bulk_load response from {}, request ballot = {}, \"\n                       \"current ballot = {}\",\n                       req.target_address.to_string(),\n                       req.config.ballot,\n                       get_ballot());\n        _replica->_primary_states.reset_node_bulk_load_states(req.target_address);\n        return;\n    }\n\n    _replica->_primary_states.secondary_bulk_load_states[req.target_address] = resp.bulk_load_state;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nerror_code replica_bulk_loader::do_bulk_load(const std::string &app_name,\n                                             bulk_load_status::type meta_status,\n                                             const std::string &cluster_name,\n                                             const std::string &provider_name,\n                                             const std::string &remote_root_path)\n{\n    if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY) {\n        return ERR_INVALID_STATE;\n    }\n\n    bulk_load_status::type local_status = _status;\n    error_code ec = validate_status(meta_status, local_status);\n    if (ec != ERR_OK) {\n        derror_replica(\"invalid bulk load status, remote = {}, local = {}\",\n                       enum_to_string(meta_status),\n                       enum_to_string(local_status));\n        return ec;\n    }\n\n    switch (meta_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n        if (local_status == bulk_load_status::BLS_INVALID ||\n            local_status == bulk_load_status::BLS_PAUSED ||\n            local_status == bulk_load_status::BLS_INGESTING ||\n            local_status == bulk_load_status::BLS_SUCCEED) {\n            const std::string remote_dir = get_remote_bulk_load_dir(\n                app_name, cluster_name, remote_root_path, get_gpid().get_partition_index());\n            ec = start_download(remote_dir, provider_name);\n        }\n        break;\n    case bulk_load_status::BLS_INGESTING:\n        if (local_status == bulk_load_status::BLS_DOWNLOADED) {\n            start_ingestion();\n        } else if (local_status == bulk_load_status::BLS_INGESTING &&\n                   status() == partition_status::PS_PRIMARY) {\n            check_ingestion_finish();\n        }\n        break;\n    case bulk_load_status::BLS_SUCCEED:\n        if (local_status == bulk_load_status::BLS_DOWNLOADED ||\n            local_status == bulk_load_status::BLS_INGESTING) {\n            handle_bulk_load_succeed();\n        } else if (local_status == bulk_load_status::BLS_SUCCEED ||\n                   local_status == bulk_load_status::BLS_INVALID) {\n            handle_bulk_load_finish(meta_status);\n        }\n        break;\n    case bulk_load_status::BLS_PAUSING:\n        pause_bulk_load();\n        break;\n    case bulk_load_status::BLS_CANCELED:\n        handle_bulk_load_finish(bulk_load_status::BLS_CANCELED);\n        break;\n    case bulk_load_status::BLS_FAILED:\n        handle_bulk_load_finish(bulk_load_status::BLS_FAILED);\n        _stub->_counter_bulk_load_failed_count->increment();\n        break;\n    default:\n        break;\n    }\n    return ec;\n}\n\n/*static*/ error_code\nreplica_bulk_loader::validate_status(const bulk_load_status::type meta_status,\n                                     const bulk_load_status::type local_status)\n{\n    error_code err = ERR_OK;\n    switch (meta_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n        if (local_status == bulk_load_status::BLS_FAILED ||\n            local_status == bulk_load_status::BLS_PAUSING ||\n            local_status == bulk_load_status::BLS_CANCELED) {\n            err = ERR_INVALID_STATE;\n        }\n        break;\n    case bulk_load_status::BLS_DOWNLOADED:\n        if (local_status != bulk_load_status::BLS_DOWNLOADED) {\n            err = ERR_INVALID_STATE;\n        }\n        break;\n    case bulk_load_status::BLS_INGESTING:\n        if (local_status != bulk_load_status::BLS_DOWNLOADED &&\n            local_status != bulk_load_status::BLS_INGESTING) {\n            err = ERR_INVALID_STATE;\n        }\n        break;\n    case bulk_load_status::BLS_SUCCEED:\n        if (local_status != bulk_load_status::BLS_DOWNLOADED &&\n            local_status != bulk_load_status::BLS_INGESTING &&\n            local_status != bulk_load_status::BLS_SUCCEED &&\n            local_status != bulk_load_status::BLS_INVALID) {\n            err = ERR_INVALID_STATE;\n        }\n        break;\n    case bulk_load_status::BLS_PAUSING:\n        if (local_status != bulk_load_status::BLS_INVALID &&\n            local_status != bulk_load_status::BLS_DOWNLOADING &&\n            local_status != bulk_load_status::BLS_DOWNLOADED &&\n            local_status != bulk_load_status::BLS_PAUSING &&\n            local_status != bulk_load_status::BLS_PAUSED) {\n            err = ERR_INVALID_STATE;\n        }\n        break;\n    case bulk_load_status::BLS_PAUSED:\n        err = ERR_INVALID_STATE;\n        break;\n    default:\n        // no limit in other status\n        break;\n    }\n    return err;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nerror_code replica_bulk_loader::start_download(const std::string &remote_dir,\n                                               const std::string &provider_name)\n{\n    if (_stub->_bulk_load_downloading_count.load() >=\n        _stub->_max_concurrent_bulk_load_downloading_count) {\n        dwarn_replica(\"node[{}] already has {} replica downloading, wait for next round\",\n                      _stub->_primary_address_str,\n                      _stub->_bulk_load_downloading_count.load());\n        return ERR_BUSY;\n    }\n\n    // reset local bulk load context and state\n    if (_status == bulk_load_status::BLS_INVALID) {\n        // try to remove possible garbage bulk load data when actually starting bulk load\n        remove_local_bulk_load_dir(utils::filesystem::path_combine(\n            _replica->_dir, bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR));\n    }\n    if (status() == partition_status::PS_PRIMARY) {\n        _replica->_primary_states.cleanup_bulk_load_states();\n    }\n    clear_bulk_load_states();\n\n    _status = bulk_load_status::BLS_DOWNLOADING;\n    ++_stub->_bulk_load_downloading_count;\n    ddebug_replica(\"node[{}] has {} replica executing downloading\",\n                   _stub->_primary_address_str,\n                   _stub->_bulk_load_downloading_count.load());\n    _bulk_load_start_time_ms = dsn_now_ms();\n    _stub->_counter_bulk_load_downloading_count->increment();\n\n    // create local bulk load dir\n    if (!utils::filesystem::directory_exists(_replica->_dir)) {\n        derror_replica(\"_dir({}) is not existed\", _replica->_dir);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n    const std::string local_dir = utils::filesystem::path_combine(\n        _replica->_dir, bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR);\n    if (!utils::filesystem::directory_exists(local_dir) &&\n        !utils::filesystem::create_directory(local_dir)) {\n        derror_replica(\"create bulk_load_dir({}) failed\", local_dir);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    // start download\n    _is_downloading.store(true);\n    _download_task = tasking::enqueue(\n        LPC_BACKGROUND_BULK_LOAD,\n        tracker(),\n        std::bind(\n            &replica_bulk_loader::download_files, this, provider_name, remote_dir, local_dir));\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_bulk_loader::download_files(const std::string &provider_name,\n                                         const std::string &remote_dir,\n                                         const std::string &local_dir)\n{\n    FAIL_POINT_INJECT_F(\"replica_bulk_loader_download_files\", [](string_view) {});\n\n    ddebug_replica(\"start to download files\");\n    dist::block_service::block_filesystem *fs =\n        _stub->_block_service_manager.get_or_create_block_filesystem(provider_name);\n\n    // download metadata file synchronously\n    uint64_t file_size = 0;\n    error_code err = _stub->_block_service_manager.download_file(\n        remote_dir, local_dir, bulk_load_constant::BULK_LOAD_METADATA, fs, file_size);\n    {\n        zauto_write_lock l(_lock);\n        if (err != ERR_OK && err != ERR_PATH_ALREADY_EXIST) {\n            try_decrease_bulk_load_download_count();\n            _download_status.store(err);\n            derror_replica(\"download bulk load metadata file failed, error = {}\", err.to_string());\n            return;\n        }\n\n        // parse metadata\n        const std::string &local_metadata_file_name =\n            utils::filesystem::path_combine(local_dir, bulk_load_constant::BULK_LOAD_METADATA);\n        err = parse_bulk_load_metadata(local_metadata_file_name);\n        if (err != ERR_OK) {\n            try_decrease_bulk_load_download_count();\n            _download_status.store(err);\n            derror_replica(\"parse bulk load metadata failed, error = {}\", err.to_string());\n            return;\n        }\n    }\n\n    // download sst files asynchronously\n    if (!_metadata.files.empty()) {\n        const file_meta &f_meta = _metadata.files[0];\n        _download_files_task[f_meta.name] = tasking::enqueue(\n            LPC_BACKGROUND_BULK_LOAD,\n            tracker(),\n            std::bind(&replica_bulk_loader::download_sst_file, this, remote_dir, local_dir, 0, fs));\n    }\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_bulk_loader::download_sst_file(const std::string &remote_dir,\n                                            const std::string &local_dir,\n                                            int32_t file_index,\n                                            dist::block_service::block_filesystem *fs)\n{\n    const file_meta &f_meta = _metadata.files[file_index];\n    uint64_t f_size = 0;\n    std::string f_md5;\n    error_code ec = _stub->_block_service_manager.download_file(\n        remote_dir, local_dir, f_meta.name, fs, f_size, f_md5);\n    const std::string &file_name = utils::filesystem::path_combine(local_dir, f_meta.name);\n    bool verified = false;\n    if (ec == ERR_PATH_ALREADY_EXIST) {\n        // We are not sure if the file was cached by system. And we couldn't\n        // afford the io overhead which is cased by reading file in verify_file(),\n        // so if file exist we just verify file size\n        if (utils::filesystem::verify_file_size(file_name, f_meta.size)) {\n            // local file exist and is verified\n            ec = ERR_OK;\n            f_size = f_meta.size;\n            verified = true;\n        } else {\n            derror_replica(\"file({}) exists, but not verified, try to remove local file \"\n                           \"and redownload it\",\n                           file_name);\n            if (!utils::filesystem::remove_path(file_name)) {\n                derror_replica(\"failed to remove file({})\", file_name);\n                ec = ERR_FILE_OPERATION_FAILED;\n            } else {\n                ec = _stub->_block_service_manager.download_file(\n                    remote_dir, local_dir, f_meta.name, fs, f_size, f_md5);\n            }\n        }\n    }\n    // Here we verify md5 and file size, md5 was calculated\n    // from download buffer, file size is get from filesystem\n    if (ec == ERR_OK && !verified) {\n        if (!f_meta.md5.empty() && f_md5 != f_meta.md5) {\n            ec = ERR_CORRUPTION;\n        } else if (!utils::filesystem::verify_file_size(file_name, f_meta.size)) {\n            ec = ERR_CORRUPTION;\n        }\n    }\n    if (ec != ERR_OK) {\n        {\n            zauto_write_lock l(_lock);\n            try_decrease_bulk_load_download_count();\n            _download_status.store(ec);\n        }\n        derror_replica(\"failed to download file({}), error = {}\", f_meta.name, ec.to_string());\n        _stub->_counter_bulk_load_download_file_fail_count->increment();\n        return;\n    }\n    // download file succeed, update progress\n    update_bulk_load_download_progress(f_size, f_meta.name);\n    _stub->_counter_bulk_load_download_file_succ_count->increment();\n    _stub->_counter_bulk_load_download_file_size->add(f_size);\n\n    // download next file\n    if (file_index + 1 < _metadata.files.size()) {\n        const file_meta &f_meta = _metadata.files[file_index + 1];\n        _download_files_task[f_meta.name] =\n            tasking::enqueue(LPC_BACKGROUND_BULK_LOAD,\n                             tracker(),\n                             std::bind(&replica_bulk_loader::download_sst_file,\n                                       this,\n                                       remote_dir,\n                                       local_dir,\n                                       file_index + 1,\n                                       fs));\n    }\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\n// need to acquire write lock while calling it\nerror_code replica_bulk_loader::parse_bulk_load_metadata(const std::string &fname)\n{\n    std::string buf;\n    error_code ec = utils::filesystem::read_file(fname, buf);\n    if (ec != ERR_OK) {\n        derror_replica(\"read file {} failed, error = {}\", fname, ec);\n        return ec;\n    }\n\n    blob bb = blob::create_from_bytes(std::move(buf));\n    if (!json::json_forwarder<bulk_load_metadata>::decode(bb, _metadata)) {\n        derror_replica(\"file({}) is damaged\", fname);\n        return ERR_CORRUPTION;\n    }\n\n    if (_metadata.file_total_size <= 0) {\n        derror_replica(\"bulk_load_metadata has invalid file_total_size({})\",\n                       _metadata.file_total_size);\n        return ERR_CORRUPTION;\n    }\n\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_bulk_loader::update_bulk_load_download_progress(uint64_t file_size,\n                                                             const std::string &file_name)\n{\n    {\n        zauto_write_lock l(_lock);\n        if (_metadata.file_total_size <= 0) {\n            derror_replica(\"update downloading file({}) progress failed, metadata has invalid \"\n                           \"file_total_size({}), current status = {}\",\n                           file_name,\n                           _metadata.file_total_size,\n                           enum_to_string(_status));\n            return;\n        }\n\n        ddebug_replica(\"update progress after downloading file({})\", file_name);\n        _cur_downloaded_size.fetch_add(file_size);\n        auto total_size = static_cast<double>(_metadata.file_total_size);\n        auto cur_downloaded_size = static_cast<double>(_cur_downloaded_size.load());\n        auto cur_progress = static_cast<int32_t>((cur_downloaded_size / total_size) * 100);\n        _download_progress.store(cur_progress);\n        ddebug_replica(\"total_size = {}, cur_downloaded_size = {}, progress = {}\",\n                       total_size,\n                       cur_downloaded_size,\n                       cur_progress);\n    }\n\n    tasking::enqueue(LPC_REPLICATION_COMMON,\n                     tracker(),\n                     std::bind(&replica_bulk_loader::check_download_finish, this),\n                     get_gpid().thread_hash());\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION, THREAD_POOL_DEFAULT\n// need to acquire write lock while calling it\nvoid replica_bulk_loader::try_decrease_bulk_load_download_count()\n{\n    if (!_is_downloading.load()) {\n        return;\n    }\n    --_stub->_bulk_load_downloading_count;\n    _is_downloading.store(false);\n    ddebug_replica(\"node[{}] has {} replica executing downloading\",\n                   _stub->_primary_address_str,\n                   _stub->_bulk_load_downloading_count.load());\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::check_download_finish()\n{\n    if (_download_progress.load() == bulk_load_constant::PROGRESS_FINISHED &&\n        _status == bulk_load_status::BLS_DOWNLOADING) {\n        ddebug_replica(\"download all files succeed\");\n        _status = bulk_load_status::BLS_DOWNLOADED;\n        {\n            zauto_write_lock l(_lock);\n            try_decrease_bulk_load_download_count();\n            cleanup_download_tasks();\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::start_ingestion()\n{\n    _status = bulk_load_status::BLS_INGESTING;\n    _stub->_counter_bulk_load_ingestion_count->increment();\n    if (status() == partition_status::PS_PRIMARY) {\n        _replica->_primary_states.ingestion_is_empty_prepare_sent = false;\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::check_ingestion_finish()\n{\n    if (_replica->_app->get_ingestion_status() == ingestion_status::IS_SUCCEED &&\n        !_replica->_primary_states.ingestion_is_empty_prepare_sent) {\n        // send an empty prepare to gurantee secondary commit ingestion request, and set\n        // `pop_all_committed_mutations` as true\n        // ingestion is a special write request, replay this mutation can not learn data from\n        // external files, so when ingestion succeed, we should create a checkpoint\n        // if learn is evoked after ingestion, we should gurantee that learner should learn from\n        // checkpoint, to gurantee the condition above, we should pop all committed mutations in\n        // prepare list to gurantee learn type is LT_APP\n        mutation_ptr mu = _replica->new_mutation(invalid_decree);\n        mu->add_client_request(RPC_REPLICATION_WRITE_EMPTY, nullptr);\n        _replica->init_prepare(mu, false, true);\n        _replica->_primary_states.ingestion_is_empty_prepare_sent = true;\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::handle_bulk_load_succeed()\n{\n    // generate checkpoint\n    _replica->init_checkpoint(true);\n\n    _replica->_app->set_ingestion_status(ingestion_status::IS_INVALID);\n    _status = bulk_load_status::BLS_SUCCEED;\n    _stub->_counter_bulk_load_succeed_count->increment();\n\n    // send an empty prepare again to gurantee that learner should learn from checkpoint\n    if (status() == partition_status::PS_PRIMARY) {\n        mutation_ptr mu = _replica->new_mutation(invalid_decree);\n        mu->add_client_request(RPC_REPLICATION_WRITE_EMPTY, nullptr);\n        _replica->init_prepare(mu, false, true);\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::handle_bulk_load_finish(bulk_load_status::type new_status)\n{\n    if (is_cleaned_up()) {\n        ddebug_replica(\"bulk load states have been cleaned up\");\n        return;\n    }\n\n    if (status() == partition_status::PS_PRIMARY) {\n        for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n            _replica->_primary_states.reset_node_bulk_load_states(target_address);\n        }\n    }\n\n    ddebug_replica(\"bulk load finished, old_status = {}, new_status = {}\",\n                   enum_to_string(_status),\n                   enum_to_string(new_status));\n\n    // remove local bulk load dir\n    std::string bulk_load_dir = utils::filesystem::path_combine(\n        _replica->_dir, bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR);\n    remove_local_bulk_load_dir(bulk_load_dir);\n    clear_bulk_load_states();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::remove_local_bulk_load_dir(const std::string &bulk_load_dir)\n{\n    if (!utils::filesystem::directory_exists(bulk_load_dir)) {\n        return;\n    }\n    // Rename bulk_load_dir to ${replica_dir}.bulk_load.timestamp.gar before remove it.\n    // Because we download sst files asynchronously and couldn't remove a directory while writing\n    // files in it.\n    std::string garbage_dir = fmt::format(\"{}.{}.{}{}\",\n                                          _replica->_dir,\n                                          bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR,\n                                          std::to_string(dsn_now_ms()),\n                                          kFolderSuffixGar);\n    if (!utils::filesystem::rename_path(bulk_load_dir, garbage_dir)) {\n        derror_replica(\"rename bulk_load dir({}) failed.\", bulk_load_dir);\n        return;\n    }\n    if (!utils::filesystem::remove_path(garbage_dir)) {\n        derror_replica(\n            \"remove bulk_load gar dir({}) failed, disk cleaner would retry to remove it.\",\n            garbage_dir);\n    }\n    ddebug_replica(\"remove bulk_load dir({}) succeed.\", garbage_dir);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\n// need to acquire write lock while calling it\nvoid replica_bulk_loader::cleanup_download_tasks()\n{\n    for (auto &kv : _download_files_task) {\n        cleanup_download_task(kv.second);\n    }\n    cleanup_download_task(_download_task);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nbool replica_bulk_loader::cleanup_download_task(task_ptr task_)\n{\n    CLEANUP_TASK(task_, false)\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::clear_bulk_load_states()\n{\n    if (_status == bulk_load_status::BLS_DOWNLOADING) {\n        try_decrease_bulk_load_download_count();\n    }\n\n    {\n        zauto_write_lock l(_lock);\n        cleanup_download_tasks();\n        _download_files_task.clear();\n        _download_task = nullptr;\n        _metadata.files.clear();\n        _metadata.file_total_size = 0;\n        _cur_downloaded_size.store(0);\n        _download_progress.store(0);\n        _download_status.store(ERR_OK);\n    }\n\n    _replica->_is_bulk_load_ingestion = false;\n    _replica->_app->set_ingestion_status(ingestion_status::IS_INVALID);\n\n    _bulk_load_start_time_ms = 0;\n    _replica->_bulk_load_ingestion_start_time_ms = 0;\n\n    _status = bulk_load_status::BLS_INVALID;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nbool replica_bulk_loader::is_cleaned_up()\n{\n    if (_status != bulk_load_status::BLS_INVALID) {\n        return false;\n    }\n    {\n        // download context not cleaned up\n        zauto_read_lock l(_lock);\n        if (_cur_downloaded_size.load() != 0 || _download_progress.load() != 0 ||\n            _download_status.load() != ERR_OK || _download_files_task.size() != 0 ||\n            _download_task != nullptr || _metadata.files.size() != 0 ||\n            _metadata.file_total_size != 0) {\n            return false;\n        }\n    }\n    // ingestion context not cleaned up\n    if (_replica->_is_bulk_load_ingestion ||\n        _replica->_app->get_ingestion_status() != ingestion_status::IS_INVALID) {\n        return false;\n    }\n    // local dir exists\n    std::string bulk_load_dir = utils::filesystem::path_combine(\n        _replica->_dir, bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR);\n    return !utils::filesystem::directory_exists(bulk_load_dir);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::pause_bulk_load()\n{\n    if (_status == bulk_load_status::BLS_PAUSED) {\n        ddebug_replica(\"bulk load has been paused\");\n        return;\n    }\n    if (_status == bulk_load_status::BLS_DOWNLOADING) {\n        zauto_write_lock l(_lock);\n        cleanup_download_tasks();\n        try_decrease_bulk_load_download_count();\n    }\n    _status = bulk_load_status::BLS_PAUSED;\n    ddebug_replica(\"bulk load is paused\");\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_bulk_load_states_to_meta(bulk_load_status::type remote_status,\n                                                          bool report_metadata,\n                                                          /*out*/ bulk_load_response &response)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    if (report_metadata) {\n        zauto_read_lock l(_lock);\n        if (!_metadata.files.empty()) {\n            response.__set_metadata(_metadata);\n        }\n    }\n\n    switch (remote_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n    case bulk_load_status::BLS_DOWNLOADED:\n        report_group_download_progress(response);\n        break;\n    case bulk_load_status::BLS_INGESTING:\n        report_group_ingestion_status(response);\n        break;\n    case bulk_load_status::BLS_SUCCEED:\n    case bulk_load_status::BLS_CANCELED:\n    case bulk_load_status::BLS_FAILED:\n        report_group_cleaned_up(response);\n        break;\n    case bulk_load_status::BLS_PAUSING:\n        report_group_is_paused(response);\n        break;\n    default:\n        break;\n    }\n\n    response.primary_bulk_load_status = _status;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_group_download_progress(/*out*/ bulk_load_response &response)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn_replica(\"replica status={}, should be {}\",\n                      enum_to_string(status()),\n                      enum_to_string(partition_status::PS_PRIMARY));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    partition_bulk_load_state primary_state;\n    {\n        zauto_read_lock l(_lock);\n        primary_state.__set_download_progress(_download_progress.load());\n        primary_state.__set_download_status(_download_status.load());\n    }\n    response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state;\n    ddebug_replica(\"primary = {}, download progress = {}%, status = {}\",\n                   _replica->_primary_states.membership.primary.to_string(),\n                   primary_state.download_progress,\n                   primary_state.download_status);\n\n    int32_t total_progress = primary_state.download_progress;\n    for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n        const auto &secondary_state =\n            _replica->_primary_states.secondary_bulk_load_states[target_address];\n        int32_t s_progress =\n            secondary_state.__isset.download_progress ? secondary_state.download_progress : 0;\n        error_code s_status =\n            secondary_state.__isset.download_status ? secondary_state.download_status : ERR_OK;\n        ddebug_replica(\"secondary = {}, download progress = {}%, status={}\",\n                       target_address.to_string(),\n                       s_progress,\n                       s_status);\n        response.group_bulk_load_state[target_address] = secondary_state;\n        total_progress += s_progress;\n    }\n\n    total_progress /= _replica->_primary_states.membership.max_replica_count;\n    ddebug_replica(\"total download progress = {}%\", total_progress);\n    response.__set_total_download_progress(total_progress);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_group_ingestion_status(/*out*/ bulk_load_response &response)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn_replica(\"replica status={}, should be {}\",\n                      enum_to_string(status()),\n                      enum_to_string(partition_status::PS_PRIMARY));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    partition_bulk_load_state primary_state;\n    primary_state.__set_ingest_status(_replica->_app->get_ingestion_status());\n    response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state;\n    ddebug_replica(\"primary = {}, ingestion status = {}\",\n                   _replica->_primary_states.membership.primary.to_string(),\n                   enum_to_string(primary_state.ingest_status));\n\n    bool is_group_ingestion_finish =\n        (primary_state.ingest_status == ingestion_status::IS_SUCCEED) &&\n        (_replica->_primary_states.membership.secondaries.size() + 1 ==\n         _replica->_primary_states.membership.max_replica_count);\n    for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n        const auto &secondary_state =\n            _replica->_primary_states.secondary_bulk_load_states[target_address];\n        ingestion_status::type ingest_status = secondary_state.__isset.ingest_status\n                                                   ? secondary_state.ingest_status\n                                                   : ingestion_status::IS_INVALID;\n        ddebug_replica(\"secondary = {}, ingestion status={}\",\n                       target_address.to_string(),\n                       enum_to_string(ingest_status));\n        response.group_bulk_load_state[target_address] = secondary_state;\n        is_group_ingestion_finish &= (ingest_status == ingestion_status::IS_SUCCEED);\n    }\n    response.__set_is_group_ingestion_finished(is_group_ingestion_finish);\n\n    // if group ingestion finish, recover wirte immediately\n    if (is_group_ingestion_finish) {\n        ddebug_replica(\"finish ingestion, recover write\");\n        _replica->_is_bulk_load_ingestion = false;\n        _replica->_bulk_load_ingestion_start_time_ms = 0;\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_group_cleaned_up(bulk_load_response &response)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn_replica(\"replica status={}, should be {}\",\n                      enum_to_string(status()),\n                      enum_to_string(partition_status::PS_PRIMARY));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    partition_bulk_load_state primary_state;\n    primary_state.__set_is_cleaned_up(is_cleaned_up());\n    response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state;\n    ddebug_replica(\"primary = {}, bulk load states cleaned_up = {}\",\n                   _replica->_primary_states.membership.primary.to_string(),\n                   primary_state.is_cleaned_up);\n\n    bool group_flag = (primary_state.is_cleaned_up) &&\n                      (_replica->_primary_states.membership.secondaries.size() + 1 ==\n                       _replica->_primary_states.membership.max_replica_count);\n    for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n        const auto &secondary_state =\n            _replica->_primary_states.secondary_bulk_load_states[target_address];\n        bool is_cleaned_up =\n            secondary_state.__isset.is_cleaned_up ? secondary_state.is_cleaned_up : false;\n        ddebug_replica(\"secondary = {}, bulk load states cleaned_up = {}\",\n                       target_address.to_string(),\n                       is_cleaned_up);\n        response.group_bulk_load_state[target_address] = secondary_state;\n        group_flag &= is_cleaned_up;\n    }\n    ddebug_replica(\"group bulk load states cleaned_up = {}\", group_flag);\n    response.__set_is_group_bulk_load_context_cleaned_up(group_flag);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_group_is_paused(bulk_load_response &response)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn_replica(\"replica status={}, should be {}\",\n                      enum_to_string(status()),\n                      enum_to_string(partition_status::PS_PRIMARY));\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    partition_bulk_load_state primary_state;\n    primary_state.__set_is_paused(_status == bulk_load_status::BLS_PAUSED);\n    response.group_bulk_load_state[_replica->_primary_states.membership.primary] = primary_state;\n    ddebug_replica(\"primary = {}, bulk_load is_paused = {}\",\n                   _replica->_primary_states.membership.primary.to_string(),\n                   primary_state.is_paused);\n\n    bool group_is_paused =\n        primary_state.is_paused && (_replica->_primary_states.membership.secondaries.size() + 1 ==\n                                    _replica->_primary_states.membership.max_replica_count);\n    for (const auto &target_address : _replica->_primary_states.membership.secondaries) {\n        partition_bulk_load_state secondary_state =\n            _replica->_primary_states.secondary_bulk_load_states[target_address];\n        bool is_paused = secondary_state.__isset.is_paused ? secondary_state.is_paused : false;\n        ddebug_replica(\n            \"secondary = {}, bulk_load is_paused = {}\", target_address.to_string(), is_paused);\n        response.group_bulk_load_state[target_address] = secondary_state;\n        group_is_paused &= is_paused;\n    }\n    ddebug_replica(\"group bulk load is_paused = {}\", group_is_paused);\n    response.__set_is_group_bulk_load_paused(group_is_paused);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::report_bulk_load_states_to_primary(\n    bulk_load_status::type remote_status,\n    /*out*/ group_bulk_load_response &response)\n{\n    if (status() != partition_status::PS_SECONDARY) {\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    partition_bulk_load_state bulk_load_state;\n    auto local_status = _status;\n    switch (remote_status) {\n    case bulk_load_status::BLS_DOWNLOADING:\n    case bulk_load_status::BLS_DOWNLOADED: {\n        zauto_read_lock l(_lock);\n        bulk_load_state.__set_download_progress(_download_progress.load());\n        bulk_load_state.__set_download_status(_download_status.load());\n    } break;\n    case bulk_load_status::BLS_INGESTING:\n        bulk_load_state.__set_ingest_status(_replica->_app->get_ingestion_status());\n        break;\n    case bulk_load_status::BLS_SUCCEED:\n    case bulk_load_status::BLS_CANCELED:\n    case bulk_load_status::BLS_FAILED:\n        bulk_load_state.__set_is_cleaned_up(is_cleaned_up());\n        break;\n    case bulk_load_status::BLS_PAUSING:\n        bulk_load_state.__set_is_paused(local_status == bulk_load_status::BLS_PAUSED);\n        break;\n    default:\n        break;\n    }\n\n    response.status = local_status;\n    response.bulk_load_state = bulk_load_state;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_bulk_loader::clear_bulk_load_states_if_needed(partition_status::type old_status,\n                                                           partition_status::type new_status)\n{\n    if ((new_status == partition_status::PS_PRIMARY ||\n         new_status == partition_status::PS_SECONDARY) &&\n        new_status != old_status) {\n        if (_status == bulk_load_status::BLS_SUCCEED || _status == bulk_load_status::BLS_CANCELED ||\n            _status == bulk_load_status::BLS_FAILED || _status == bulk_load_status::BLS_INVALID) {\n            return;\n        }\n        ddebug_replica(\"prepare to clear bulk load states, current status = {}\",\n                       enum_to_string(_status));\n        clear_bulk_load_states();\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/bulk_load/replica_bulk_loader.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/tool-api/zlocks.h>\n\n#include \"replica/replica.h\"\n#include \"replica/replica_context.h\"\n#include \"replica/replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_bulk_loader : replica_base\n{\npublic:\n    explicit replica_bulk_loader(replica *r);\n    ~replica_bulk_loader();\n\n    void on_bulk_load(const bulk_load_request &request, /*out*/ bulk_load_response &response);\n\n    void on_group_bulk_load(const group_bulk_load_request &request,\n                            /*out*/ group_bulk_load_response &response);\n\nprivate:\n    void broadcast_group_bulk_load(const bulk_load_request &meta_req);\n    void on_group_bulk_load_reply(error_code err,\n                                  const group_bulk_load_request &req,\n                                  const group_bulk_load_response &resp);\n\n    error_code do_bulk_load(const std::string &app_name,\n                            bulk_load_status::type meta_status,\n                            const std::string &cluster_name,\n                            const std::string &provider_name,\n                            const std::string &remote_root_path);\n\n    // compare meta bulk load status and local bulk load status\n    // \\return ERR_INVALID_STATE if local status is invalid\n    // for example, if meta status is ingestion, replica local status can only be downloaded or\n    // ingestion, if local status is other status, will return ERR_INVALID_STATE\n    static error_code validate_status(const bulk_load_status::type meta_status,\n                                      const bulk_load_status::type local_status);\n\n    // replica start or restart download sst files from remote provider\n    // \\return ERR_BUSY if node has already had enough replica executing downloading\n    // \\return ERR_FILE_OPERATION_FAILED: create local bulk load dir failed\n    error_code start_download(const std::string &remote_dir, const std::string &provider_name);\n\n    // download metadata file and create sst download tasks\n    // metadata and sst files will be downloaded in {_dir}/.bulk_load directory\n    void download_files(const std::string &provider_name,\n                        const std::string &remote_dir,\n                        const std::string &local_dir);\n\n    // download sst files from remote provider\n    void download_sst_file(const std::string &remote_dir,\n                           const std::string &local_dir,\n                           int32_t file_index,\n                           dist::block_service::block_filesystem *fs);\n\n    // \\return ERR_FILE_OPERATION_FAILED: file not exist, get size failed, open file failed\n    // \\return ERR_CORRUPTION: parse failed\n    // need to acquire write lock while calling it\n    error_code parse_bulk_load_metadata(const std::string &fname);\n\n    // update download progress after downloading sst files succeed\n    void update_bulk_load_download_progress(uint64_t file_size, const std::string &file_name);\n\n    // need to acquire write lock while calling it\n    void try_decrease_bulk_load_download_count();\n    void check_download_finish();\n    void start_ingestion();\n    void check_ingestion_finish();\n    void handle_bulk_load_succeed();\n    // called when bulk load succeed or failed or canceled\n    void handle_bulk_load_finish(bulk_load_status::type new_status);\n    void pause_bulk_load();\n\n    void remove_local_bulk_load_dir(const std::string &bulk_load_dir);\n    // need to acquire write lock while calling it\n    void cleanup_download_tasks();\n    bool cleanup_download_task(task_ptr task_);\n    void clear_bulk_load_states();\n    bool is_cleaned_up();\n\n    void report_bulk_load_states_to_meta(bulk_load_status::type remote_status,\n                                         bool report_metadata,\n                                         /*out*/ bulk_load_response &response);\n    void report_group_download_progress(/*out*/ bulk_load_response &response);\n    void report_group_ingestion_status(/*out*/ bulk_load_response &response);\n    void report_group_cleaned_up(/*out*/ bulk_load_response &response);\n    void report_group_is_paused(/*out*/ bulk_load_response &response);\n\n    void report_bulk_load_states_to_primary(bulk_load_status::type remote_status,\n                                            /*out*/ group_bulk_load_response &response);\n\n    // called by `update_local_configuration` to do possible states cleaning up\n    void clear_bulk_load_states_if_needed(partition_status::type old_status,\n                                          partition_status::type new_status);\n\n    ///\n    /// bulk load path on remote file provider:\n    /// <remote_root_path>/<cluster_name>/<app_name>/{bulk_load_info}\n    /// <remote_root_path>/<cluster_name>/<app_name>/<partition_index>/<file_name>\n    /// <remote_root_path>/<cluster_name>/<app_name>/<partition_index>/bulk_load_metadata\n    ///\n    // get partition's file dir on remote file provider\n    inline std::string get_remote_bulk_load_dir(const std::string &app_name,\n                                                const std::string &cluster_name,\n                                                const std::string &remote_root_path,\n                                                uint32_t pidx) const\n    {\n        std::ostringstream oss;\n        oss << remote_root_path << \"/\" << cluster_name << \"/\" << app_name << \"/\" << pidx;\n        return oss.str();\n    }\n\n    inline bulk_load_status::type get_bulk_load_status() const { return _status; }\n\n    inline void set_bulk_load_status(bulk_load_status::type status) { _status = status; }\n\n    inline uint64_t duration_ms() const\n    {\n        return _bulk_load_start_time_ms > 0 ? (dsn_now_ms() - _bulk_load_start_time_ms) : 0;\n    }\n\n    inline uint64_t ingestion_duration_ms() const\n    {\n        return _replica->_bulk_load_ingestion_start_time_ms > 0\n                   ? (dsn_now_ms() - _replica->_bulk_load_ingestion_start_time_ms)\n                   : 0;\n    }\n\n    //\n    // helper functions\n    //\n    partition_status::type status() const { return _replica->status(); }\n    ballot get_ballot() const { return _replica->get_ballot(); }\n    task_tracker *tracker() { return _replica->tracker(); }\n\nprivate:\n    replica *_replica;\n    replica_stub *_stub;\n\n    friend class replica;\n    friend class replica_stub;\n    friend class replica_bulk_loader_test;\n\n    // bulk load states lock\n    zrwlock_nr _lock; // {\n    bulk_load_status::type _status{bulk_load_status::BLS_INVALID};\n    bulk_load_metadata _metadata;\n    std::atomic<bool> _is_downloading{false};\n    std::atomic<uint64_t> _cur_downloaded_size{0};\n    std::atomic<int32_t> _download_progress{0};\n    std::atomic<error_code> _download_status{ERR_OK};\n    // }\n    // file_name -> downloading task\n    std::map<std::string, task_ptr> _download_files_task;\n    // download metadata and create download file tasks\n    task_ptr _download_task;\n    // Used for perf-counter\n    uint64_t _bulk_load_start_time_ms{0};\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/bulk_load/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replica_bulk_load_test)\n\nset(MY_PROJ_SRC \"\")\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_meta_server\n        dsn_replica_server\n        dsn_replication_common\n        dsn_runtime\n        hashtable\n        gtest\n)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n        config-test.ini\n        run.sh\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/bulk_load/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.replica]\ntype = replica\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_BLOCK_SERVICE,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 1465902258\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 4\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 2\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 3\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nname = block_service\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[block_service.local_service]\ntype = local_service\nargs =\n"
  },
  {
    "path": "src/replica/bulk_load/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/replica/bulk_load/test/replica_bulk_loader_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/bulk_load/replica_bulk_loader.h\"\n#include \"replica/test/replica_test_base.h\"\n\n#include <fstream>\n\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/utility/fail_point.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_bulk_loader_test : public replica_test_base\n{\npublic:\n    replica_bulk_loader_test()\n    {\n        _replica = create_mock_replica(stub.get());\n        _bulk_loader = make_unique<replica_bulk_loader>(_replica.get());\n        fail::setup();\n    }\n\n    ~replica_bulk_loader_test() { fail::teardown(); }\n\n    /// bulk load functions\n\n    error_code test_on_bulk_load()\n    {\n        bulk_load_response resp;\n        _bulk_loader->on_bulk_load(_req, resp);\n        return resp.err;\n    }\n\n    error_code test_on_group_bulk_load(bulk_load_status::type status, ballot b)\n    {\n        create_group_bulk_load_request(status, b);\n        group_bulk_load_response resp;\n        _bulk_loader->on_group_bulk_load(_group_req, resp);\n        return resp.err;\n    }\n\n    error_code test_start_downloading()\n    {\n        const std::string remote_dir = _bulk_loader->get_remote_bulk_load_dir(\n            APP_NAME, CLUSTER, ROOT_PATH, PID.get_partition_index());\n        auto err = _bulk_loader->start_download(remote_dir, PROVIDER);\n        _bulk_loader->tracker()->wait_outstanding_tasks();\n        return err;\n    }\n\n    void test_rollback_to_downloading(bulk_load_status::type cur_status)\n    {\n        switch (cur_status) {\n        case bulk_load_status::BLS_PAUSED:\n            mock_group_progress(bulk_load_status::BLS_DOWNLOADING, 30, 100, 100);\n            break;\n        case bulk_load_status::BLS_INGESTING:\n            mock_group_ingestion_states(\n                ingestion_status::IS_SUCCEED, ingestion_status::IS_SUCCEED, true);\n            break;\n        case bulk_load_status::BLS_SUCCEED:\n            mock_group_cleanup_flag(bulk_load_status::BLS_SUCCEED);\n            break;\n        default:\n            return;\n        }\n        create_bulk_load_request(bulk_load_status::BLS_DOWNLOADING);\n        test_start_downloading();\n    }\n\n    error_code test_parse_bulk_load_metadata(const std::string &file_path)\n    {\n        return _bulk_loader->parse_bulk_load_metadata(file_path);\n    }\n\n    void test_update_download_progress(uint64_t file_size)\n    {\n        _bulk_loader->_is_downloading.store(true);\n        _bulk_loader->update_bulk_load_download_progress(file_size, \"test_file_name\");\n        _bulk_loader->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_start_ingestion() { _bulk_loader->start_ingestion(); }\n\n    void test_handle_bulk_load_finish(bulk_load_status::type status,\n                                      int32_t download_progress,\n                                      ingestion_status::type istatus,\n                                      bool is_bulk_load_ingestion,\n                                      bulk_load_status::type req_status)\n    {\n        mock_replica_bulk_load_varieties(\n            status, download_progress, istatus, is_bulk_load_ingestion);\n        _bulk_loader->handle_bulk_load_finish(req_status);\n    }\n\n    void test_pause_bulk_load(bulk_load_status::type status, int32_t progress, bool is_downloading)\n    {\n        mock_replica_bulk_load_varieties(\n            status, progress, ingestion_status::IS_INVALID, false, is_downloading);\n        _bulk_loader->pause_bulk_load();\n    }\n\n    int32_t test_report_group_download_progress(bulk_load_status::type status,\n                                                int32_t p_progress,\n                                                int32_t s1_progress,\n                                                int32_t s2_progress)\n    {\n        mock_group_progress(status, p_progress, s1_progress, s2_progress);\n        bulk_load_response response;\n        _bulk_loader->report_group_download_progress(response);\n        return response.total_download_progress;\n    }\n\n    bool test_report_group_ingestion_status(ingestion_status::type primary,\n                                            ingestion_status::type secondary1,\n                                            ingestion_status::type secondary2,\n                                            bool is_empty_prepare_sent,\n                                            bool replica_is_ingestion)\n    {\n        _replica->set_is_ingestion(replica_is_ingestion);\n        _replica->set_ingestion_status(primary);\n        mock_secondary_ingestion_states(secondary1, secondary2, is_empty_prepare_sent);\n        bulk_load_response response;\n        _bulk_loader->report_group_ingestion_status(response);\n        return response.is_group_ingestion_finished;\n    }\n\n    bool test_report_group_cleaned_up()\n    {\n        bulk_load_response response;\n        _bulk_loader->report_group_cleaned_up(response);\n        return response.is_group_bulk_load_context_cleaned_up;\n    }\n\n    bool test_report_group_is_paused(bulk_load_status::type status)\n    {\n        mock_group_progress(status, 10, 50, 50);\n        partition_bulk_load_state state;\n        state.__set_is_paused(true);\n        _replica->set_secondary_bulk_load_state(SECONDARY, state);\n        _replica->set_secondary_bulk_load_state(SECONDARY2, state);\n\n        bulk_load_response response;\n        _bulk_loader->report_group_is_paused(response);\n        return response.is_group_bulk_load_paused;\n    }\n\n    void test_on_group_bulk_load_reply(bulk_load_status::type req_status,\n                                       ballot req_ballot,\n                                       error_code resp_error = ERR_OK,\n                                       error_code rpc_error = ERR_OK)\n    {\n        create_group_bulk_load_request(req_status, req_ballot);\n        group_bulk_load_response resp;\n        resp.err = resp_error;\n        _bulk_loader->on_group_bulk_load_reply(rpc_error, _group_req, resp);\n    }\n\n    bool validate_status(const bulk_load_status::type meta_status,\n                         const bulk_load_status::type local_status)\n    {\n        return replica_bulk_loader::validate_status(meta_status, local_status) == ERR_OK;\n    }\n\n    /// mock structure functions\n\n    void\n    create_bulk_load_request(bulk_load_status::type status, ballot b, int32_t downloading_count = 0)\n    {\n        _req.app_name = APP_NAME;\n        _req.ballot = b;\n        _req.cluster_name = CLUSTER;\n        _req.meta_bulk_load_status = status;\n        _req.pid = PID;\n        _req.remote_provider_name = PROVIDER;\n        _req.remote_root_path = ROOT_PATH;\n        stub->set_bulk_load_downloading_count(downloading_count);\n    }\n\n    void create_bulk_load_request(bulk_load_status::type status, int32_t downloading_count = 0)\n    {\n        if (status != bulk_load_status::BLS_DOWNLOADING) {\n            downloading_count = 0;\n        }\n        create_bulk_load_request(status, BALLOT, downloading_count);\n    }\n\n    void create_group_bulk_load_request(bulk_load_status::type status, ballot b)\n    {\n        _group_req.app_name = APP_NAME;\n        _group_req.meta_bulk_load_status = status;\n        _group_req.config.status = partition_status::PS_SECONDARY;\n        _group_req.config.ballot = b;\n        _group_req.target_address = SECONDARY;\n    }\n\n    void mock_replica_config(partition_status::type status)\n    {\n        replica_configuration rconfig;\n        rconfig.ballot = BALLOT;\n        rconfig.pid = PID;\n        rconfig.primary = PRIMARY;\n        rconfig.status = status;\n        _replica->set_replica_config(rconfig);\n    }\n\n    void mock_primary_states()\n    {\n        mock_replica_config(partition_status::PS_PRIMARY);\n        partition_configuration config;\n        config.max_replica_count = 3;\n        config.pid = PID;\n        config.ballot = BALLOT;\n        config.primary = PRIMARY;\n        config.secondaries.emplace_back(SECONDARY);\n        config.secondaries.emplace_back(SECONDARY2);\n        _replica->set_primary_partition_configuration(config);\n    }\n\n    void create_local_file(const std::string &file_name)\n    {\n        std::string whole_name = utils::filesystem::path_combine(LOCAL_DIR, file_name);\n        utils::filesystem::create_file(whole_name);\n        std::ofstream test_file;\n        test_file.open(whole_name);\n        test_file << \"write some data.\\n\";\n        test_file.close();\n\n        _file_meta.name = whole_name;\n        utils::filesystem::md5sum(whole_name, _file_meta.md5);\n        utils::filesystem::file_size(whole_name, _file_meta.size);\n    }\n\n    error_code create_local_metadata_file()\n    {\n        create_local_file(FILE_NAME);\n        _metadata.files.emplace_back(_file_meta);\n        _metadata.file_total_size = _file_meta.size;\n\n        std::string whole_name = utils::filesystem::path_combine(LOCAL_DIR, METADATA);\n        utils::filesystem::create_file(whole_name);\n        std::ofstream os(whole_name.c_str(),\n                         (std::ofstream::out | std::ios::binary | std::ofstream::trunc));\n        if (!os.is_open()) {\n            derror(\"open file %s failed\", whole_name.c_str());\n            return ERR_FILE_OPERATION_FAILED;\n        }\n\n        blob bb = json::json_forwarder<bulk_load_metadata>::encode(_metadata);\n        os.write((const char *)bb.data(), (std::streamsize)bb.length());\n        if (os.bad()) {\n            derror(\"write file %s failed\", whole_name.c_str());\n            return ERR_FILE_OPERATION_FAILED;\n        }\n        os.close();\n\n        return ERR_OK;\n    }\n\n    bool validate_metadata()\n    {\n        auto target = _bulk_loader->_metadata;\n        if (target.file_total_size != _metadata.file_total_size) {\n            return false;\n        }\n        if (target.files.size() != _metadata.files.size()) {\n            return false;\n        }\n        for (int i = 0; i < target.files.size(); ++i) {\n            if (target.files[i].name != _metadata.files[i].name) {\n                return false;\n            }\n            if (target.files[i].size != _metadata.files[i].size) {\n                return false;\n            }\n            if (target.files[i].md5 != _metadata.files[i].md5) {\n                return false;\n            }\n        }\n        return true;\n    }\n\n    void mock_downloading_progress(uint64_t file_total_size,\n                                   uint64_t cur_downloaded_size,\n                                   int32_t download_progress)\n    {\n        _bulk_loader->_status = bulk_load_status::type::BLS_DOWNLOADING;\n        _bulk_loader->_metadata.file_total_size = file_total_size;\n        _bulk_loader->_cur_downloaded_size = cur_downloaded_size;\n        _bulk_loader->_download_progress = download_progress;\n    }\n\n    void mock_replica_bulk_load_varieties(bulk_load_status::type status,\n                                          int32_t download_progress,\n                                          ingestion_status::type istatus,\n                                          bool is_ingestion = false,\n                                          bool is_downloading = false)\n    {\n        _bulk_loader->_status = status;\n        _bulk_loader->_download_progress = download_progress;\n        _bulk_loader->_is_downloading.store(is_downloading);\n        _replica->set_is_ingestion(is_ingestion);\n        _replica->set_ingestion_status(istatus);\n    }\n\n    void mock_secondary_progress(int32_t secondary_progress1, int32_t secondary_progress2)\n    {\n        mock_primary_states();\n        partition_bulk_load_state state1, state2;\n        state1.__set_download_status(ERR_OK);\n        state1.__set_download_progress(secondary_progress1);\n        state2.__set_download_status(ERR_OK);\n        state2.__set_download_progress(secondary_progress2);\n        _replica->set_secondary_bulk_load_state(SECONDARY, state1);\n        _replica->set_secondary_bulk_load_state(SECONDARY2, state2);\n    }\n\n    void mock_group_progress(bulk_load_status::type p_status,\n                             int32_t p_progress,\n                             int32_t s1_progress,\n                             int32_t s2_progress)\n    {\n        if (p_status == bulk_load_status::BLS_INVALID) {\n            p_progress = 0;\n        } else if (p_status == bulk_load_status::BLS_DOWNLOADED) {\n            p_progress = 100;\n        }\n        mock_replica_bulk_load_varieties(p_status, p_progress, ingestion_status::IS_INVALID);\n        mock_secondary_progress(s1_progress, s2_progress);\n    }\n\n    void mock_group_progress(bulk_load_status::type p_status)\n    {\n        if (p_status == bulk_load_status::BLS_INVALID) {\n            mock_group_progress(p_status, 0, 0, 0);\n        } else if (p_status == bulk_load_status::BLS_DOWNLOADED) {\n            mock_group_progress(p_status, 100, 100, 100);\n        }\n    }\n\n    void mock_secondary_ingestion_states(ingestion_status::type status1,\n                                         ingestion_status::type status2,\n                                         bool is_empty_prepare_sent = true)\n    {\n        mock_secondary_progress(100, 100);\n        _replica->set_is_empty_prepare_sent(is_empty_prepare_sent);\n\n        partition_bulk_load_state state1, state2;\n        state1.__set_ingest_status(status1);\n        state2.__set_ingest_status(status2);\n        _replica->set_secondary_bulk_load_state(SECONDARY, state1);\n        _replica->set_secondary_bulk_load_state(SECONDARY2, state2);\n    }\n\n    void mock_group_ingestion_states(ingestion_status::type s1_status,\n                                     ingestion_status::type s2_status,\n                                     bool is_empty_prepare_sent = true)\n    {\n        mock_replica_bulk_load_varieties(\n            bulk_load_status::BLS_INGESTING, 100, ingestion_status::IS_SUCCEED);\n        mock_secondary_ingestion_states(s1_status, s2_status, is_empty_prepare_sent);\n    }\n\n    void mock_group_cleanup_flag(bulk_load_status::type primary_status,\n                                 bool s1_cleaned_up = true,\n                                 bool s2_cleaned_up = true)\n    {\n        int32_t primary_progress = primary_status == bulk_load_status::BLS_SUCCEED ? 100 : 0;\n        mock_replica_bulk_load_varieties(\n            primary_status, primary_progress, ingestion_status::IS_INVALID);\n        mock_secondary_ingestion_states(\n            ingestion_status::IS_INVALID, ingestion_status::IS_INVALID, true);\n\n        partition_bulk_load_state state1, state2;\n        state1.__set_is_cleaned_up(s1_cleaned_up);\n        state2.__set_is_cleaned_up(s2_cleaned_up);\n        _replica->set_secondary_bulk_load_state(SECONDARY, state1);\n        _replica->set_secondary_bulk_load_state(SECONDARY2, state2);\n    }\n\n    // helper functions\n    bulk_load_status::type get_bulk_load_status() const { return _bulk_loader->_status; }\n    bool is_cleaned_up() { return _bulk_loader->is_cleaned_up(); }\n    int32_t get_download_progress() { return _bulk_loader->_download_progress.load(); }\n    bool is_secondary_bulk_load_state_reset()\n    {\n        const partition_bulk_load_state &state = _replica->get_secondary_bulk_load_state(SECONDARY);\n        bool is_download_state_reset =\n            (state.__isset.download_progress && state.__isset.download_status &&\n             state.download_progress == 0 && state.download_status == ERR_OK);\n        bool is_ingestion_status_reset =\n            (state.__isset.ingest_status && state.ingest_status == ingestion_status::IS_INVALID);\n        bool is_cleanup_flag_reset = (state.__isset.is_cleaned_up && !state.is_cleaned_up);\n        bool is_paused_flag_reset = (state.__isset.is_paused && !state.is_paused);\n        return is_download_state_reset && is_ingestion_status_reset && is_cleanup_flag_reset &&\n               is_paused_flag_reset;\n    }\n\npublic:\n    std::unique_ptr<mock_replica> _replica;\n    std::unique_ptr<replica_bulk_loader> _bulk_loader;\n\n    bulk_load_request _req;\n    group_bulk_load_request _group_req;\n\n    file_meta _file_meta;\n    bulk_load_metadata _metadata;\n\n    std::string APP_NAME = \"replica\";\n    std::string CLUSTER = \"cluster\";\n    std::string PROVIDER = \"local_service\";\n    std::string ROOT_PATH = \"bulk_load_root\";\n    gpid PID = gpid(1, 0);\n    ballot BALLOT = 3;\n    rpc_address PRIMARY = rpc_address(\"127.0.0.2\", 34801);\n    rpc_address SECONDARY = rpc_address(\"127.0.0.3\", 34801);\n    rpc_address SECONDARY2 = rpc_address(\"127.0.0.4\", 34801);\n    int32_t MAX_DOWNLOADING_COUNT = 5;\n    std::string LOCAL_DIR = bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR;\n    std::string METADATA = bulk_load_constant::BULK_LOAD_METADATA;\n    std::string FILE_NAME = \"test_sst_file\";\n};\n\n// on_bulk_load unit tests\nTEST_F(replica_bulk_loader_test, on_bulk_load_not_primary)\n{\n    create_bulk_load_request(bulk_load_status::BLS_DOWNLOADING);\n    ASSERT_EQ(test_on_bulk_load(), ERR_INVALID_STATE);\n}\n\nTEST_F(replica_bulk_loader_test, on_bulk_load_ballot_change)\n{\n    create_bulk_load_request(bulk_load_status::BLS_DOWNLOADING, BALLOT + 1);\n    mock_primary_states();\n    ASSERT_EQ(test_on_bulk_load(), ERR_INVALID_STATE);\n}\n\n// on_group_bulk_load unit tests\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_test)\n{\n    struct test_struct\n    {\n        partition_status::type pstatus;\n        bulk_load_status::type bstatus;\n        ballot b;\n        error_code expected_err;\n    } tests[] = {\n        {partition_status::PS_SECONDARY,\n         bulk_load_status::BLS_DOWNLOADING,\n         BALLOT - 1,\n         ERR_VERSION_OUTDATED},\n        {partition_status::PS_SECONDARY,\n         bulk_load_status::BLS_DOWNLOADED,\n         BALLOT + 1,\n         ERR_INVALID_STATE},\n        {partition_status::PS_INACTIVE, bulk_load_status::BLS_INGESTING, BALLOT, ERR_INVALID_STATE},\n    };\n\n    for (auto test : tests) {\n        mock_replica_config(test.pstatus);\n        ASSERT_EQ(test_on_group_bulk_load(test.bstatus, test.b), test.expected_err);\n    }\n}\n\n// start_downloading unit tests\nTEST_F(replica_bulk_loader_test, start_downloading_test)\n{\n    // Test cases:\n    // - stub concurrent downloading count excceed\n    // - downloading error\n    // - downloading succeed\n    struct test_struct\n    {\n        int32_t downloading_count;\n        error_code expected_err;\n        bulk_load_status::type expected_status;\n        int32_t expected_downloading_count;\n    } tests[]{\n        {MAX_DOWNLOADING_COUNT, ERR_BUSY, bulk_load_status::BLS_INVALID, MAX_DOWNLOADING_COUNT},\n        {1, ERR_OK, bulk_load_status::BLS_DOWNLOADING, 2}};\n    fail::cfg(\"replica_bulk_loader_download_files\", \"return()\");\n    for (auto test : tests) {\n        mock_group_progress(bulk_load_status::BLS_INVALID);\n        create_bulk_load_request(bulk_load_status::BLS_DOWNLOADING, test.downloading_count);\n\n        ASSERT_EQ(test_start_downloading(), test.expected_err);\n        ASSERT_EQ(get_bulk_load_status(), test.expected_status);\n        ASSERT_EQ(stub->get_bulk_load_downloading_count(), test.expected_downloading_count);\n    }\n}\n\n// start_downloading unit tests\nTEST_F(replica_bulk_loader_test, rollback_to_downloading_test)\n{\n    fail::cfg(\"replica_bulk_loader_download_files\", \"return()\");\n    struct test_struct\n    {\n        bulk_load_status::type status;\n    } tests[]{{bulk_load_status::BLS_PAUSED},\n              {bulk_load_status::BLS_INGESTING},\n              {bulk_load_status::BLS_SUCCEED}};\n\n    for (auto test : tests) {\n        test_rollback_to_downloading(test.status);\n        ASSERT_EQ(get_bulk_load_status(), bulk_load_status::BLS_DOWNLOADING);\n        ASSERT_TRUE(_replica->is_primary_bulk_load_states_cleaned());\n        ASSERT_EQ(_replica->get_ingestion_status(), ingestion_status::IS_INVALID);\n        ASSERT_FALSE(_replica->is_ingestion());\n    }\n}\n\n// parse_bulk_load_metadata unit tests\nTEST_F(replica_bulk_loader_test, bulk_load_metadata_not_exist)\n{\n    ASSERT_EQ(test_parse_bulk_load_metadata(\"path_not_exist\"), ERR_FILE_OPERATION_FAILED);\n}\n\nTEST_F(replica_bulk_loader_test, bulk_load_metadata_corrupt)\n{\n    // create file can not parse as bulk_load_metadata structure\n    utils::filesystem::create_directory(LOCAL_DIR);\n    create_local_file(METADATA);\n    std::string metadata_file_name = utils::filesystem::path_combine(LOCAL_DIR, METADATA);\n    error_code ec = test_parse_bulk_load_metadata(metadata_file_name);\n    ASSERT_EQ(ec, ERR_CORRUPTION);\n    utils::filesystem::remove_path(LOCAL_DIR);\n}\n\nTEST_F(replica_bulk_loader_test, bulk_load_metadata_parse_succeed)\n{\n    utils::filesystem::create_directory(LOCAL_DIR);\n    error_code ec = create_local_metadata_file();\n    ASSERT_EQ(ec, ERR_OK);\n\n    std::string metadata_file_name = utils::filesystem::path_combine(LOCAL_DIR, METADATA);\n    ec = test_parse_bulk_load_metadata(metadata_file_name);\n    ASSERT_EQ(ec, ERR_OK);\n    ASSERT_TRUE(validate_metadata());\n    utils::filesystem::remove_path(LOCAL_DIR);\n}\n\n// finish download test\nTEST_F(replica_bulk_loader_test, finish_download_test)\n{\n    mock_downloading_progress(100, 50, 50);\n    stub->set_bulk_load_downloading_count(3);\n\n    test_update_download_progress(50);\n    ASSERT_EQ(get_bulk_load_status(), bulk_load_status::BLS_DOWNLOADED);\n    ASSERT_EQ(stub->get_bulk_load_downloading_count(), 2);\n}\n\n// start ingestion test\nTEST_F(replica_bulk_loader_test, start_ingestion_test)\n{\n    mock_group_progress(bulk_load_status::BLS_DOWNLOADED);\n    test_start_ingestion();\n    ASSERT_EQ(get_bulk_load_status(), bulk_load_status::BLS_INGESTING);\n}\n\n// handle_bulk_load_finish unit tests\nTEST_F(replica_bulk_loader_test, bulk_load_finish_test)\n{\n    // Test cases\n    // - bulk load succeed\n    // - double bulk load finish\n    // - invalid with directory not removed\n    // - cancel during downloaded\n    // - cancel during ingestion\n    // - cancel during succeed\n    // - failed during downloading\n    // - failed during ingestion\n    // Tip: bulk load dir will be removed if bulk load finished, so we should create dir before some\n    // cases\n    struct test_struct\n    {\n        bulk_load_status::type local_status;\n        int32_t progress;\n        ingestion_status::type istatus;\n        bool is_ingestion;\n        bulk_load_status::type request_status;\n        bool create_dir;\n    } tests[]{{bulk_load_status::BLS_SUCCEED,\n               100,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_SUCCEED,\n               false},\n              {bulk_load_status::BLS_INVALID,\n               0,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_SUCCEED,\n               false},\n              {bulk_load_status::BLS_INVALID,\n               0,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_SUCCEED,\n               true},\n              {bulk_load_status::BLS_DOWNLOADED,\n               100,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_CANCELED,\n               true},\n              {bulk_load_status::BLS_INGESTING,\n               100,\n               ingestion_status::type::IS_RUNNING,\n               true,\n               bulk_load_status::BLS_CANCELED,\n               true},\n              {bulk_load_status::BLS_SUCCEED,\n               100,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_CANCELED,\n               true},\n              {bulk_load_status::BLS_DOWNLOADING,\n               10,\n               ingestion_status::IS_INVALID,\n               false,\n               bulk_load_status::BLS_FAILED,\n               true},\n              {bulk_load_status::BLS_INGESTING,\n               100,\n               ingestion_status::type::IS_FAILED,\n               false,\n               bulk_load_status::BLS_FAILED,\n               true}};\n\n    for (auto test : tests) {\n        if (test.create_dir) {\n            utils::filesystem::create_directory(LOCAL_DIR);\n        }\n        test_handle_bulk_load_finish(\n            test.local_status, test.progress, test.istatus, test.is_ingestion, test.request_status);\n        ASSERT_EQ(_replica->get_ingestion_status(), ingestion_status::IS_INVALID);\n        ASSERT_FALSE(_replica->is_ingestion());\n        ASSERT_TRUE(is_cleaned_up());\n    }\n}\n\n// pause_bulk_load unit tests\nTEST_F(replica_bulk_loader_test, pause_bulk_load_test)\n{\n    const int32_t stub_downloading_count = 3;\n    // Test cases:\n    // pausing while not bulk load\n    // pausing during downloading\n    // pausing during downloaded\n    struct test_struct\n    {\n        bulk_load_status::type status;\n        int32_t progress;\n        bool is_downloading;\n        int32_t expected_progress;\n        int32_t expected_downloading_count;\n    } tests[]{\n        {bulk_load_status::BLS_INVALID, 0, false, 0, stub_downloading_count},\n        {bulk_load_status::BLS_DOWNLOADING, 10, true, 10, stub_downloading_count - 1},\n        {bulk_load_status::BLS_DOWNLOADING, 0, false, 0, stub_downloading_count},\n        {bulk_load_status::BLS_DOWNLOADED, 100, false, 100, stub_downloading_count},\n    };\n\n    for (auto test : tests) {\n        stub->set_bulk_load_downloading_count(stub_downloading_count);\n        test_pause_bulk_load(test.status, test.progress, test.is_downloading);\n        ASSERT_EQ(get_bulk_load_status(), bulk_load_status::BLS_PAUSED);\n        ASSERT_EQ(get_download_progress(), test.expected_progress);\n        ASSERT_EQ(stub->get_bulk_load_downloading_count(), test.expected_downloading_count);\n    }\n}\n\n// report_group_download_progress unit tests\nTEST_F(replica_bulk_loader_test, report_group_download_progress_test)\n{\n    struct test_struct\n    {\n        bulk_load_status::type primary_status;\n        int32_t primary_progress;\n        int32_t secondary1_progress;\n        int32_t secondary2_progress;\n        int32_t total_progress;\n    } tests[]{\n        {bulk_load_status::BLS_DOWNLOADING, 10, 10, 10, 10},\n        {bulk_load_status::BLS_DOWNLOADED, 100, 0, 0, 33},\n        {bulk_load_status::BLS_DOWNLOADED, 100, 100, 100, 100},\n    };\n\n    for (auto test : tests) {\n        ASSERT_EQ(test_report_group_download_progress(test.primary_status,\n                                                      test.primary_progress,\n                                                      test.secondary1_progress,\n                                                      test.secondary2_progress),\n                  test.total_progress);\n    }\n}\n\n// report_group_ingestion_status unit tests\nTEST_F(replica_bulk_loader_test, report_group_ingestion_status_test)\n{\n\n    struct ingestion_struct\n    {\n        ingestion_status::type primary;\n        ingestion_status::type secondary1;\n        ingestion_status::type secondary2;\n        bool is_empty_prepare_sent;\n        bool replica_is_ingestion;\n        bool is_group_ingestion_finished;\n    } tests[] = {\n        {ingestion_status::IS_INVALID,\n         ingestion_status::IS_INVALID,\n         ingestion_status::IS_INVALID,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_RUNNING,\n         ingestion_status::IS_INVALID,\n         ingestion_status::IS_INVALID,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_SUCCEED,\n         ingestion_status::IS_INVALID,\n         ingestion_status::IS_INVALID,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_FAILED,\n         ingestion_status::IS_INVALID,\n         ingestion_status::IS_INVALID,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_RUNNING,\n         ingestion_status::IS_RUNNING,\n         ingestion_status::IS_INVALID,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_SUCCEED,\n         ingestion_status::IS_SUCCEED,\n         ingestion_status::IS_RUNNING,\n         true,\n         false,\n         false},\n        {ingestion_status::IS_FAILED,\n         ingestion_status::IS_FAILED,\n         ingestion_status::IS_RUNNING,\n         false,\n         false,\n         false},\n        {ingestion_status::IS_SUCCEED,\n         ingestion_status::IS_SUCCEED,\n         ingestion_status::IS_SUCCEED,\n         true,\n         true,\n         true},\n    };\n\n    for (auto test : tests) {\n        ASSERT_EQ(test_report_group_ingestion_status(test.primary,\n                                                     test.secondary1,\n                                                     test.secondary2,\n                                                     test.is_empty_prepare_sent,\n                                                     test.replica_is_ingestion),\n                  test.is_group_ingestion_finished);\n        ASSERT_FALSE(_replica->is_ingestion());\n    }\n}\n\n// report_group_context_clean_flag unit tests\nTEST_F(replica_bulk_loader_test, report_group_cleanup_flag_in_unhealthy_state)\n{\n    // _primary_states.membership.secondaries is empty\n    mock_replica_config(partition_status::PS_PRIMARY);\n    ASSERT_FALSE(test_report_group_cleaned_up());\n}\n\nTEST_F(replica_bulk_loader_test, report_group_cleanup_flag_not_cleaned_up)\n{\n    mock_group_cleanup_flag(bulk_load_status::BLS_SUCCEED, true, false);\n    ASSERT_FALSE(test_report_group_cleaned_up());\n}\n\nTEST_F(replica_bulk_loader_test, report_group_cleanup_flag_all_cleaned_up)\n{\n    mock_group_cleanup_flag(bulk_load_status::BLS_INVALID, true, true);\n    ASSERT_TRUE(test_report_group_cleaned_up());\n}\n\n// report_group_is_paused unit tests\nTEST_F(replica_bulk_loader_test, report_group_is_paused_test)\n{\n    struct test_struct\n    {\n        bulk_load_status::type local_status;\n        bool expected;\n    } tests[]{{bulk_load_status::BLS_DOWNLOADING, false}, {bulk_load_status::BLS_PAUSED, true}};\n\n    for (auto test : tests) {\n        ASSERT_EQ(test_report_group_is_paused(test.local_status), test.expected);\n    }\n}\n\n// on_group_bulk_load_reply unit tests\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_downloading_error)\n{\n    mock_group_progress(bulk_load_status::BLS_DOWNLOADING, 30, 30, 60);\n    test_on_group_bulk_load_reply(bulk_load_status::BLS_DOWNLOADING, BALLOT, ERR_BUSY);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_downloaded_error)\n{\n    mock_group_progress(bulk_load_status::BLS_DOWNLOADED);\n    test_on_group_bulk_load_reply(bulk_load_status::BLS_DOWNLOADED, BALLOT, ERR_INVALID_STATE);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_ingestion_error)\n{\n    mock_group_ingestion_states(ingestion_status::IS_RUNNING, ingestion_status::IS_SUCCEED);\n    test_on_group_bulk_load_reply(\n        bulk_load_status::BLS_INGESTING, BALLOT - 1, ERR_OK, ERR_INVALID_STATE);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_succeed_error)\n{\n    mock_group_cleanup_flag(bulk_load_status::BLS_SUCCEED);\n    test_on_group_bulk_load_reply(\n        bulk_load_status::BLS_SUCCEED, BALLOT - 1, ERR_OK, ERR_INVALID_STATE);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_failed_error)\n{\n    mock_group_ingestion_states(ingestion_status::IS_RUNNING, ingestion_status::IS_SUCCEED);\n    test_on_group_bulk_load_reply(bulk_load_status::BLS_FAILED, BALLOT, ERR_OK, ERR_TIMEOUT);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_pausing_error)\n{\n    mock_group_progress(bulk_load_status::BLS_PAUSED, 100, 50, 10);\n    test_on_group_bulk_load_reply(\n        bulk_load_status::BLS_PAUSING, BALLOT, ERR_OK, ERR_NETWORK_FAILURE);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\nTEST_F(replica_bulk_loader_test, on_group_bulk_load_reply_rpc_error)\n{\n    mock_group_cleanup_flag(bulk_load_status::BLS_INVALID, true, false);\n    test_on_group_bulk_load_reply(bulk_load_status::BLS_CANCELED, BALLOT, ERR_OBJECT_NOT_FOUND);\n    ASSERT_TRUE(is_secondary_bulk_load_state_reset());\n}\n\n// validate_status unit test\nTEST_F(replica_bulk_loader_test, validate_status_test)\n{\n    struct validate_struct\n    {\n        bulk_load_status::type meta_status;\n        bulk_load_status::type local_status;\n        bool expected_flag;\n    } tests[] = {{bulk_load_status::BLS_INVALID, bulk_load_status::BLS_INVALID, true},\n                 {bulk_load_status::BLS_PAUSED, bulk_load_status::BLS_PAUSED, false},\n                 {bulk_load_status::BLS_FAILED, bulk_load_status::BLS_INGESTING, true},\n                 {bulk_load_status::BLS_CANCELED, bulk_load_status::BLS_SUCCEED, true},\n                 {bulk_load_status::BLS_DOWNLOADING, bulk_load_status::BLS_INVALID, true},\n                 {bulk_load_status::BLS_DOWNLOADING, bulk_load_status::BLS_INGESTING, true},\n                 {bulk_load_status::BLS_DOWNLOADING, bulk_load_status::BLS_SUCCEED, true},\n                 {bulk_load_status::BLS_DOWNLOADING, bulk_load_status::BLS_FAILED, false},\n                 {bulk_load_status::BLS_DOWNLOADING, bulk_load_status::BLS_CANCELED, false},\n                 {bulk_load_status::BLS_DOWNLOADED, bulk_load_status::BLS_INVALID, false},\n                 {bulk_load_status::BLS_DOWNLOADED, bulk_load_status::BLS_DOWNLOADED, true},\n                 {bulk_load_status::BLS_INGESTING, bulk_load_status::BLS_DOWNLOADED, true},\n                 {bulk_load_status::BLS_INGESTING, bulk_load_status::BLS_SUCCEED, false},\n                 {bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_INVALID, true},\n                 {bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_DOWNLOADED, true},\n                 {bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_INGESTING, true},\n                 {bulk_load_status::BLS_SUCCEED, bulk_load_status::BLS_DOWNLOADING, false},\n                 {bulk_load_status::BLS_PAUSING, bulk_load_status::BLS_INVALID, true},\n                 {bulk_load_status::BLS_PAUSING, bulk_load_status::BLS_DOWNLOADING, true},\n                 {bulk_load_status::BLS_PAUSING, bulk_load_status::BLS_DOWNLOADED, true},\n                 {bulk_load_status::BLS_PAUSING, bulk_load_status::BLS_PAUSED, true},\n                 {bulk_load_status::BLS_PAUSING, bulk_load_status::BLS_INGESTING, false}};\n\n    for (auto test : tests) {\n        ASSERT_EQ(validate_status(test.meta_status, test.local_status), test.expected_flag);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/bulk_load/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\n./dsn_replica_bulk_load_test\n\nif [ $? -ne 0 ]; then\n    tail -n 100 data/log/log.1.txt\n    if [ -f core ]; then\n        gdb ./dsn_replica_bulk_load_test core -ex \"bt\"\n    fi\n    exit 1\nfi\n"
  },
  {
    "path": "src/replica/disk_cleaner.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <dsn/utility/flags.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/c/api_layer1.h>\n\n#include \"disk_cleaner.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint64(\n    \"replication\",\n    gc_disk_error_replica_interval_seconds,\n    7 * 24 * 3600 /*7day*/,\n    \"Duration of error replica being removed, which is in a directory with '.err' suffixed\");\nDSN_TAG_VARIABLE(gc_disk_error_replica_interval_seconds, FT_MUTABLE);\n\nDSN_DEFINE_uint64(\n    \"replication\",\n    gc_disk_garbage_replica_interval_seconds,\n    24 * 3600 /*1day*/,\n    \"Duration of garbaged replica being removed, which is in a directory with '.gar' suffixed\");\nDSN_TAG_VARIABLE(gc_disk_garbage_replica_interval_seconds, FT_MUTABLE);\n\nDSN_DEFINE_uint64(\"replication\",\n                  gc_disk_migration_tmp_replica_interval_seconds,\n                  24 * 3600 /*1day*/,\n                  \"Duration of disk-migration tmp replica being removed, which is in a directory \"\n                  \"with '.tmp' suffixed\");\nDSN_TAG_VARIABLE(gc_disk_migration_tmp_replica_interval_seconds, FT_MUTABLE);\n\nDSN_DEFINE_uint64(\"replication\",\n                  gc_disk_migration_origin_replica_interval_seconds,\n                  7 * 24 * 3600 /*7day*/,\n                  \"Duration of disk-migration origin replica being removed, which is in a \"\n                  \"directory with '.ori' suffixed\");\nDSN_TAG_VARIABLE(gc_disk_migration_origin_replica_interval_seconds, FT_MUTABLE);\n\nconst std::string kFolderSuffixErr = \".err\";\nconst std::string kFolderSuffixGar = \".gar\";\nconst std::string kFolderSuffixBak = \".bak\";\nconst std::string kFolderSuffixOri = \".ori\";\nconst std::string kFolderSuffixTmp = \".tmp\";\n\nerror_s disk_remove_useless_dirs(const std::vector<std::string> &data_dirs,\n                                 /*output*/ disk_cleaning_report &report)\n{\n    std::vector<std::string> sub_list;\n    for (auto &dir : data_dirs) {\n        std::vector<std::string> tmp_list;\n        if (!dsn::utils::filesystem::get_subdirectories(dir, tmp_list, false)) {\n            dwarn_f(\"gc_disk: failed to get subdirectories in {}\", dir);\n            return error_s::make(ERR_OBJECT_NOT_FOUND, \"failed to get subdirectories\");\n        }\n        sub_list.insert(sub_list.end(), tmp_list.begin(), tmp_list.end());\n    }\n    for (auto &fpath : sub_list) {\n        auto name = dsn::utils::filesystem::get_file_name(fpath);\n        if (!is_data_dir_removable(name)) {\n            continue;\n        }\n        std::string folder_suffix = name.substr(name.length() - 4);\n\n        time_t mt;\n        if (!dsn::utils::filesystem::last_write_time(fpath, mt)) {\n            dwarn_f(\"gc_disk: failed to get last write time of {}\", fpath);\n            continue;\n        }\n\n        auto last_write_time = (uint64_t)mt;\n        uint64_t current_time_ms = dsn_now_ms();\n        uint64_t remove_interval_seconds = current_time_ms / 1000;\n\n        // don't delete \".bak\" directory because it is backed by administrator.\n        if (folder_suffix == kFolderSuffixErr) {\n            report.error_replica_count++;\n            remove_interval_seconds = FLAGS_gc_disk_error_replica_interval_seconds;\n        } else if (folder_suffix == kFolderSuffixGar) {\n            report.garbage_replica_count++;\n            remove_interval_seconds = FLAGS_gc_disk_garbage_replica_interval_seconds;\n        } else if (folder_suffix == kFolderSuffixTmp) {\n            report.disk_migrate_tmp_count++;\n            remove_interval_seconds = FLAGS_gc_disk_migration_tmp_replica_interval_seconds;\n        } else if (folder_suffix == kFolderSuffixOri) {\n            report.disk_migrate_origin_count++;\n            remove_interval_seconds = FLAGS_gc_disk_migration_origin_replica_interval_seconds;\n        }\n\n        if (last_write_time + remove_interval_seconds <= current_time_ms / 1000) {\n            if (!dsn::utils::filesystem::remove_path(fpath)) {\n                dwarn_f(\"gc_disk: failed to delete directory '{}', time_used_ms = {}\",\n                        fpath,\n                        dsn_now_ms() - current_time_ms);\n            } else {\n                dwarn_f(\"gc_disk: replica_dir_op succeed to delete directory '{}'\"\n                        \", time_used_ms = {}\",\n                        fpath,\n                        dsn_now_ms() - current_time_ms);\n                report.remove_dir_count++;\n            }\n        } else {\n            ddebug_f(\"gc_disk: reserve directory '{}', wait_seconds = {}\",\n                     fpath,\n                     last_write_time + remove_interval_seconds - current_time_ms / 1000);\n        }\n    }\n    return error_s::ok();\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/disk_cleaner.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n#pragma once\n\n#include <vector>\n#include <dsn/utility/errors.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DECLARE_uint64(gc_disk_error_replica_interval_seconds);\nDSN_DECLARE_uint64(gc_disk_garbage_replica_interval_seconds);\nDSN_DECLARE_uint64(gc_disk_migration_tmp_replica_interval_seconds);\nDSN_DECLARE_uint64(gc_disk_migration_origin_replica_interval_seconds);\n\n// the invalid folder suffix, server will check disk folder and deal with them\nextern const std::string kFolderSuffixErr; // replica error dir\nextern const std::string kFolderSuffixGar; // replica closed and assign garbage dir\nextern const std::string kFolderSuffixBak; // replica backup dir which can be restored\nextern const std::string kFolderSuffixOri; // replica disk migration origin dir\nextern const std::string kFolderSuffixTmp; // replica disk migration temp dir\n\nstruct disk_cleaning_report\n{\n    int remove_dir_count{0};\n\n    int garbage_replica_count{0};\n    int error_replica_count{0};\n    int disk_migrate_tmp_count{0};\n    int disk_migrate_origin_count{0};\n};\n\n// Removes the useless data from data directories.\nextern error_s disk_remove_useless_dirs(const std::vector<std::string> &data_dirs,\n                                        /*output*/ disk_cleaning_report &report);\n\ninline bool is_data_dir_removable(const std::string &dir)\n{\n    if (dir.length() < 4) {\n        return false;\n    }\n    const std::string folder_suffix = dir.substr(dir.length() - 4);\n    return (folder_suffix == kFolderSuffixErr || folder_suffix == kFolderSuffixGar ||\n            folder_suffix == kFolderSuffixTmp || folder_suffix == kFolderSuffixOri);\n}\n\n// Note: \".bak\" is invalid but not allow delete, because it can be backed by administrator.\ninline bool is_data_dir_invalid(const std::string &dir)\n{\n    if (dir.length() < 4) {\n        return false;\n    }\n    const std::string folder_suffix = dir.substr(dir.length() - 4);\n    return is_data_dir_removable(dir) || folder_suffix == kFolderSuffixBak;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/duplication_pipeline.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"duplication_pipeline.h\"\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"replica/replica_stub.h\"\n#include \"load_from_private_log.h\"\n\nnamespace dsn {\nnamespace replication {\n\n//                     //\n// mutation_duplicator //\n//                     //\n\n/*static*/ std::function<std::unique_ptr<mutation_duplicator>(\n    replica_base *, string_view /*remote cluster*/, string_view /*app*/)>\n    mutation_duplicator::creator;\n\n//               //\n// load_mutation //\n//               //\n\nvoid load_mutation::run()\n{\n    decree last_decree = _duplicator->progress().last_decree;\n    _start_decree = last_decree + 1;\n    if (_replica->private_log()->max_commit_on_disk() < _start_decree) {\n        // wait 100ms for next try if no mutation was added.\n        repeat(100_ms);\n        return;\n    }\n\n    _log_on_disk->set_start_decree(_start_decree);\n    _log_on_disk->async();\n}\n\nload_mutation::~load_mutation() = default;\n\nload_mutation::load_mutation(replica_duplicator *duplicator,\n                             replica *r,\n                             load_from_private_log *load_private)\n    : replica_base(r), _log_on_disk(load_private), _replica(r), _duplicator(duplicator)\n{\n}\n\n//               //\n// ship_mutation //\n//               //\n\nvoid ship_mutation::ship(mutation_tuple_set &&in)\n{\n    _mutation_duplicator->duplicate(std::move(in), [this](size_t total_shipped_size) mutable {\n        update_progress();\n        _counter_dup_shipped_bytes_rate->add(total_shipped_size);\n        step_down_next_stage();\n    });\n}\n\nvoid ship_mutation::run(decree &&last_decree, mutation_tuple_set &&in)\n{\n    _last_decree = last_decree;\n\n    if (in.empty()) {\n        update_progress();\n        step_down_next_stage();\n        return;\n    }\n\n    ship(std::move(in));\n}\n\nvoid ship_mutation::update_progress()\n{\n    dcheck_eq_replica(\n        _duplicator->update_progress(duplication_progress().set_last_decree(_last_decree)),\n        error_s::ok());\n\n    // committed decree never decreases\n    decree last_committed_decree = _replica->last_committed_decree();\n    dcheck_ge_replica(last_committed_decree, _last_decree);\n}\n\nship_mutation::ship_mutation(replica_duplicator *duplicator)\n    : replica_base(duplicator),\n      _duplicator(duplicator),\n      _replica(duplicator->_replica),\n      _stub(duplicator->_replica->get_replica_stub())\n{\n    _mutation_duplicator = new_mutation_duplicator(\n        duplicator, _duplicator->remote_cluster_name(), _replica->get_app_info()->app_name);\n    _mutation_duplicator->set_task_environment(duplicator);\n\n    _counter_dup_shipped_bytes_rate.init_app_counter(\"eon.replica_stub\",\n                                                     \"dup.shipped_bytes_rate\",\n                                                     COUNTER_TYPE_RATE,\n                                                     \"shipping rate of private log in bytes\");\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/duplication_pipeline.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/cpp/pipeline.h>\n#include <dsn/dist/replication/replica_base.h>\n#include <dsn/dist/replication/mutation_duplicator.h>\n\n#include \"replica/replica.h\"\n#include \"replica_duplicator.h\"\n\nnamespace dsn {\nnamespace replication {\n\nusing namespace literals::chrono_literals;\n\n// load_mutation is a pipeline stage for loading mutations, aka mutation_tuple_set,\n// to the next stage, `ship_mutation`.\n// ThreadPool: THREAD_POOL_REPLICATION\nclass load_mutation final : public replica_base,\n                            public pipeline::when<>,\n                            public pipeline::result<decree, mutation_tuple_set>\n{\npublic:\n    void run() override;\n\n    /// ==== Implementation ==== ///\n\n    load_mutation(replica_duplicator *duplicator, replica *r, load_from_private_log *load_private);\n\n    ~load_mutation();\n\nprivate:\n    load_from_private_log *_log_on_disk;\n    decree _start_decree{0};\n\n    replica *_replica{nullptr};\n    replica_duplicator *_duplicator{nullptr};\n};\n\n// ship_mutation is a pipeline stage receiving a set of mutations,\n// sending them to the remote cluster. After finished, the pipeline\n// will restart from load_mutation.\n// ThreadPool: THREAD_POOL_REPLICATION\nclass ship_mutation final : public replica_base,\n                            public pipeline::when<decree, mutation_tuple_set>,\n                            public pipeline::result<>\n{\npublic:\n    void run(decree &&last_decree, mutation_tuple_set &&in) override;\n\n    /// ==== Implementation ==== ///\n\n    explicit ship_mutation(replica_duplicator *duplicator);\n\n    void ship(mutation_tuple_set &&in);\n\nprivate:\n    void update_progress();\n\n    friend class ship_mutation_test;\n    friend class replica_duplicator_test;\n\n    std::unique_ptr<mutation_duplicator> _mutation_duplicator;\n\n    replica_duplicator *_duplicator;\n    replica *_replica;\n    replica_stub *_stub;\n\n    decree _last_decree{invalid_decree};\n\n    perf_counter_wrapper _counter_dup_shipped_bytes_rate;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/duplication_sync_timer.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/replica_stub.h\"\n#include \"replica/replica.h\"\n\n#include \"duplication_sync_timer.h\"\n#include \"replica_duplicator_manager.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/utility/output_utils.h>\n#include <dsn/utility/string_conv.h>\n\nnamespace dsn {\nnamespace replication {\n\nDEFINE_TASK_CODE(LPC_DUPLICATION_SYNC_TIMER, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nvoid duplication_sync_timer::run()\n{\n    // ensure duplication sync never be concurrent\n    if (_rpc_task) {\n        ddebug_f(\"a duplication sync is already ongoing\");\n        return;\n    }\n\n    {\n        zauto_lock l(_stub->_state_lock);\n        if (_stub->_state == replica_stub::NS_Disconnected) {\n            ddebug_f(\"stop this round of duplication sync because this server is disconnected from \"\n                     \"meta server\");\n            return;\n        }\n    }\n\n    auto req = make_unique<duplication_sync_request>();\n    req->node = _stub->primary_address();\n\n    // collects confirm points from all primaries on this server\n    uint64_t pending_muts_cnt = 0;\n    for (const replica_ptr &r : get_all_primaries()) {\n        auto confirmed = r->get_duplication_manager()->get_duplication_confirms_to_update();\n        if (!confirmed.empty()) {\n            req->confirm_list[r->get_gpid()] = std::move(confirmed);\n        }\n        pending_muts_cnt += r->get_duplication_manager()->get_pending_mutations_count();\n    }\n    _stub->_counter_dup_pending_mutations_count->set(pending_muts_cnt);\n\n    duplication_sync_rpc rpc(std::move(req), RPC_CM_DUPLICATION_SYNC, 3_s);\n    rpc_address meta_server_address(_stub->get_meta_server_address());\n    ddebug_f(\"duplication_sync to meta({})\", meta_server_address.to_string());\n\n    zauto_lock l(_lock);\n    _rpc_task =\n        rpc.call(meta_server_address, &_stub->_tracker, [this, rpc](error_code err) mutable {\n            on_duplication_sync_reply(err, rpc.response());\n        });\n}\n\nvoid duplication_sync_timer::on_duplication_sync_reply(error_code err,\n                                                       const duplication_sync_response &resp)\n{\n    if (err == ERR_OK && resp.err != ERR_OK) {\n        err = resp.err;\n    }\n    if (err != ERR_OK) {\n        derror_f(\"on_duplication_sync_reply: err({})\", err.to_string());\n    } else {\n        update_duplication_map(resp.dup_map);\n    }\n\n    zauto_lock l(_lock);\n    _rpc_task = nullptr;\n}\n\nvoid duplication_sync_timer::update_duplication_map(\n    const std::map<int32_t, std::map<int32_t, duplication_entry>> &dup_map)\n{\n    for (replica_ptr &r : get_all_replicas()) {\n        auto it = dup_map.find(r->get_gpid().get_app_id());\n        if (it == dup_map.end()) {\n            // no duplication is assigned to this app\n            r->get_duplication_manager()->update_duplication_map({});\n        } else {\n            r->get_duplication_manager()->update_duplication_map(it->second);\n        }\n    }\n}\n\nduplication_sync_timer::duplication_sync_timer(replica_stub *stub) : _stub(stub) {}\n\nduplication_sync_timer::~duplication_sync_timer() {}\n\nstd::vector<replica_ptr> duplication_sync_timer::get_all_primaries()\n{\n    std::vector<replica_ptr> replica_vec;\n    {\n        zauto_read_lock l(_stub->_replicas_lock);\n        for (auto &kv : _stub->_replicas) {\n            replica_ptr r = kv.second;\n            if (r->status() != partition_status::PS_PRIMARY) {\n                continue;\n            }\n            replica_vec.emplace_back(std::move(r));\n        }\n    }\n    return replica_vec;\n}\n\nstd::vector<replica_ptr> duplication_sync_timer::get_all_replicas()\n{\n    std::vector<replica_ptr> replica_vec;\n    {\n        zauto_read_lock l(_stub->_replicas_lock);\n        for (auto &kv : _stub->_replicas) {\n            replica_ptr r = kv.second;\n            replica_vec.emplace_back(std::move(r));\n        }\n    }\n    return replica_vec;\n}\n\nvoid duplication_sync_timer::close()\n{\n    ddebug(\"stop duplication sync\");\n\n    {\n        zauto_lock l(_lock);\n        if (_rpc_task) {\n            _rpc_task->cancel(true);\n            _rpc_task = nullptr;\n        }\n    }\n\n    if (_timer_task) {\n        _timer_task->cancel(true);\n        _timer_task = nullptr;\n    }\n}\n\nvoid duplication_sync_timer::start()\n{\n    ddebug_f(\"run duplication sync periodically in {}s\", DUPLICATION_SYNC_PERIOD_SECOND);\n\n    _timer_task = tasking::enqueue_timer(LPC_DUPLICATION_SYNC_TIMER,\n                                         &_stub->_tracker,\n                                         [this]() { run(); },\n                                         DUPLICATION_SYNC_PERIOD_SECOND * 1_s,\n                                         0,\n                                         DUPLICATION_SYNC_PERIOD_SECOND * 1_s);\n}\n\nstd::multimap<dupid_t, duplication_sync_timer::replica_dup_state>\nduplication_sync_timer::get_dup_states(int app_id, /*out*/ bool *app_found)\n{\n    *app_found = false;\n    std::multimap<dupid_t, replica_dup_state> result;\n    for (const replica_ptr &r : get_all_primaries()) {\n        gpid rid = r->get_gpid();\n        if (rid.get_app_id() != app_id) {\n            continue;\n        }\n        *app_found = true;\n        replica_dup_state state;\n        state.id = rid;\n        auto states = r->get_duplication_manager()->get_dup_states();\n        decree last_committed_decree = r->last_committed_decree();\n        for (const auto &s : states) {\n            state.duplicating = s.duplicating;\n            state.not_confirmed = std::max(decree(0), last_committed_decree - s.confirmed_decree);\n            state.not_duplicated = std::max(decree(0), last_committed_decree - s.last_decree);\n            state.fail_mode = s.fail_mode;\n            result.emplace(std::make_pair(s.dupid, state));\n        }\n    }\n    return result;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/duplication_sync_timer.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <atomic>\n\n#include \"replica/replica_stub.h\"\n\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/utility/chrono_literals.h>\n\nnamespace dsn {\nnamespace replication {\n\nusing namespace literals::chrono_literals;\n\nconstexpr int DUPLICATION_SYNC_PERIOD_SECOND = 10;\n\n// Per-server(replica_stub)-instance.\nclass duplication_sync_timer\n{\npublic:\n    explicit duplication_sync_timer(replica_stub *stub);\n\n    ~duplication_sync_timer();\n\n    void start();\n\n    void close();\n\n    struct replica_dup_state\n    {\n        gpid id;\n        bool duplicating{false};\n        decree not_duplicated{0};\n        decree not_confirmed{0};\n        duplication_fail_mode::type fail_mode{duplication_fail_mode::FAIL_SLOW};\n    };\n    std::multimap<dupid_t, replica_dup_state> get_dup_states(int app_id, /*out*/ bool *app_found);\n\nprivate:\n    // replica server periodically uploads current confirm points to meta server by sending\n    // `duplication_sync_request`.\n    // if success, meta server will respond with `duplication_sync_response`, which contains\n    // the entire set of duplications on this server.\n    void run();\n\n    /// \\param dup_map: <appid -> list<dup_entry>>\n    void\n    update_duplication_map(const std::map<app_id, std::map<dupid_t, duplication_entry>> &dup_map);\n\n    void on_duplication_sync_reply(error_code err, const duplication_sync_response &resp);\n\n    std::vector<replica_ptr> get_all_primaries();\n\n    std::vector<replica_ptr> get_all_replicas();\n\nprivate:\n    friend class duplication_sync_timer_test;\n\n    replica_stub *_stub{nullptr};\n\n    task_ptr _timer_task;\n    task_ptr _rpc_task;\n    mutable zlock _lock; // protect _rpc_task\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/load_from_private_log.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n\n#include \"replica/replica_stub.h\"\n#include \"replica/replica.h\"\n#include \"replica/mutation_log_utils.h\"\n#include \"load_from_private_log.h\"\n#include \"replica_duplicator.h\"\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\n/*static*/ constexpr int load_from_private_log::MAX_ALLOWED_BLOCK_REPEATS;\n/*static*/ constexpr int load_from_private_log::MAX_ALLOWED_FILE_REPEATS;\n\nbool load_from_private_log::will_fail_skip() const\n{\n    return _err_file_repeats_num >= MAX_ALLOWED_FILE_REPEATS &&\n           _duplicator->fail_mode() == duplication_fail_mode::FAIL_SKIP;\n}\n\nbool load_from_private_log::will_fail_fast() const\n{\n    return _err_file_repeats_num >= MAX_ALLOWED_FILE_REPEATS &&\n           _duplicator->fail_mode() == duplication_fail_mode::FAIL_FAST;\n}\n\n// Fast path to next file. If next file (_current->index + 1) is invalid,\n// we try to list all files and select a new one to start (find_log_file_to_start).\nbool load_from_private_log::switch_to_next_log_file()\n{\n    auto file_map = _private_log->get_log_file_map();\n    auto next_file_it = file_map.find(_current->index() + 1);\n    if (next_file_it != file_map.end()) {\n        log_file_ptr file;\n        error_s es = log_utils::open_read(next_file_it->second->path(), file);\n        if (!es.is_ok()) {\n            derror_replica(\"{}\", es);\n            _current = nullptr;\n            return false;\n        }\n        start_from_log_file(file);\n        return true;\n    } else {\n        ddebug_f(\"no next log file (log.{}) is found\", _current->index() + 1);\n        _current = nullptr;\n        return false;\n    }\n}\n\nvoid load_from_private_log::run()\n{\n    dassert_replica(_start_decree != invalid_decree, \"{}\", _start_decree);\n    _duplicator->verify_start_decree(_start_decree);\n\n    // last_decree() == invalid_decree is the init status of mutation_buffer when create\n    // _mutation_batch, which means the duplication sync hasn't been completed, so need wait sync\n    // complete and the  confirmed_decree  != invalid_decree, and then reset mutation_buffer to\n    // valid status\n    if (_mutation_batch.last_decree() == invalid_decree) {\n        if (_duplicator->progress().confirmed_decree == invalid_decree) {\n            dwarn_replica(\"duplication status hasn't been sync completed, try next for delay 1s, \"\n                          \"last_commit_decree={}, \"\n                          \"confirmed_decree={}\",\n                          _duplicator->progress().last_decree,\n                          _duplicator->progress().confirmed_decree);\n            repeat(1_s);\n\n            FAIL_POINT_INJECT_NOT_RETURN_F(\"duplication_sync_complete\", [&](string_view s) -> void {\n                if (_duplicator->progress().confirmed_decree == invalid_decree) {\n                    // set_confirmed_decree(9), the value must be equal (decree_start of\n                    // `test_start_duplication` in `load_from_private_log_test.cpp`) -1\n                    _duplicator->update_progress(_duplicator->progress().set_confirmed_decree(9));\n                }\n            });\n            return;\n        } else {\n            _mutation_batch.reset_mutation_buffer(_duplicator->progress().confirmed_decree);\n        }\n    }\n\n    if (_current == nullptr) {\n        find_log_file_to_start();\n        if (_current == nullptr) {\n            ddebug_replica(\"no private log file is currently available\");\n            repeat(_repeat_delay);\n            return;\n        }\n    }\n\n    replay_log_block();\n}\n\nvoid load_from_private_log::find_log_file_to_start()\n{\n    // `file_map` has already excluded the useless log files during replica init.\n    auto file_map = _private_log->get_log_file_map();\n\n    // Reopen the files. Because the internal file handle of `file_map`\n    // is cleared once WAL replay finished. They are unable to read.\n    std::map<int, log_file_ptr> new_file_map;\n    for (const auto &pr : file_map) {\n        log_file_ptr file;\n        error_s es = log_utils::open_read(pr.second->path(), file);\n        if (!es.is_ok()) {\n            derror_replica(\"{}\", es);\n            return;\n        }\n        new_file_map.emplace(pr.first, file);\n    }\n\n    find_log_file_to_start(std::move(new_file_map));\n}\n\nvoid load_from_private_log::find_log_file_to_start(std::map<int, log_file_ptr> log_file_map)\n{\n    _current = nullptr;\n    if (dsn_unlikely(log_file_map.empty())) {\n        derror_replica(\"unable to start duplication since no log file is available\");\n        return;\n    }\n\n    for (auto it = log_file_map.begin(); it != log_file_map.end(); it++) {\n        auto next_it = std::next(it);\n        if (next_it == log_file_map.end()) {\n            // use the last file if no file to read\n            if (!_current) {\n                _current = it->second;\n            }\n            break;\n        }\n        if (it->second->previous_log_max_decree(get_gpid()) < _start_decree &&\n            _start_decree <= next_it->second->previous_log_max_decree(get_gpid())) {\n            // `start_decree` is within the range\n            _current = it->second;\n            // find the latest file that matches the condition\n        }\n    }\n    start_from_log_file(_current);\n}\n\nvoid load_from_private_log::replay_log_block()\n{\n    error_s err =\n        mutation_log::replay_block(_current,\n                                   [this](int log_bytes_length, mutation_ptr &mu) -> bool {\n                                       auto es = _mutation_batch.add(std::move(mu));\n                                       dassert_replica(es.is_ok(), es.description());\n                                       _counter_dup_log_read_bytes_rate->add(log_bytes_length);\n                                       _counter_dup_log_read_mutations_rate->increment();\n                                       return true;\n                                   },\n                                   _start_offset,\n                                   _current_global_end_offset);\n    if (!err.is_ok() && err.code() != ERR_HANDLE_EOF) {\n        // Error handling on loading failure:\n        // - If block loading failed for `MAX_ALLOWED_REPEATS` times, it restarts reading the file.\n        // - If file loading failed for `MAX_ALLOWED_FILE_REPEATS` times, which means it\n        //   met some permanent problem (maybe data corruption), there are 2 options for\n        //   the next move:\n        //   1. skip this file, abandon the data, can be adopted by who allows minor data lost.\n        //   2. fail-slow, retry reading this file until human interference.\n        _err_block_repeats_num++;\n        if (_err_block_repeats_num >= MAX_ALLOWED_BLOCK_REPEATS) {\n            derror_replica(\n                \"loading mutation logs failed for {} times: [err: {}, file: {}, start_offset: {}]\",\n                _err_block_repeats_num,\n                err,\n                _current->path(),\n                _start_offset);\n            _counter_dup_load_file_failed_count->increment();\n            _err_file_repeats_num++;\n            if (dsn_unlikely(will_fail_skip())) {\n                // skip this file\n                derror_replica(\"failed loading for {} times, abandon file {} and try next\",\n                               _err_file_repeats_num,\n                               _current->path());\n                _err_file_repeats_num = 0;\n\n                auto prev_offset = _current_global_end_offset;\n                if (switch_to_next_log_file()) {\n                    // successfully skip to next file\n                    auto skipped_bytes = _current_global_end_offset - prev_offset;\n                    _counter_dup_load_skipped_bytes_count->add(skipped_bytes);\n                    repeat(_repeat_delay);\n                    return;\n                }\n            } else if (dsn_unlikely(will_fail_fast())) {\n                dassert_replica(\n                    false,\n                    \"unable to load file {}, fail fast. please check if the file is corrupted\",\n                    _current->path());\n            }\n            // retry from file start\n            find_log_file_to_start();\n        }\n        repeat(_repeat_delay);\n        return;\n    }\n\n    if (err.is_ok()) {\n        _start_offset = static_cast<size_t>(_current_global_end_offset - _current->start_offset());\n        if (_mutation_batch.bytes() < FLAGS_duplicate_log_batch_bytes) {\n            repeat();\n            return;\n        }\n    } else if (switch_to_next_log_file()) {\n        // !err.is_ok() means that err.code() == ERR_HANDLE_EOF, the current file read completed and\n        // try next file\n        repeat();\n        return;\n    }\n    // update last_decree even for empty batch.\n    // case1: err.is_ok(err.code() != ERR_HANDLE_EOF), but _mutation_batch.bytes() >=\n    // FLAGS_duplicate_log_batch_bytes\n    // case2: !err.is_ok(err.code() == ERR_HANDLE_EOF) and no next file, need commit the last\n    // mutations()\n    step_down_next_stage(_mutation_batch.last_decree(), _mutation_batch.move_all_mutations());\n}\n\nload_from_private_log::load_from_private_log(replica *r, replica_duplicator *dup)\n    : replica_base(r),\n      _private_log(r->private_log()),\n      _duplicator(dup),\n      _stub(r->get_replica_stub()),\n      _mutation_batch(dup)\n{\n    _counter_dup_log_read_bytes_rate.init_app_counter(\"eon.replica_stub\",\n                                                      \"dup.log_read_bytes_rate\",\n                                                      COUNTER_TYPE_RATE,\n                                                      \"reading rate of private log in bytes\");\n    _counter_dup_log_read_mutations_rate.init_app_counter(\n        \"eon.replica_stub\",\n        \"dup.log_read_mutations_rate\",\n        COUNTER_TYPE_RATE,\n        \"reading rate of mutations from private log\");\n    _counter_dup_load_file_failed_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"dup.load_file_failed_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"the number of failures loading a private log file during duplication\");\n    _counter_dup_load_skipped_bytes_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"dup.load_skipped_bytes_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"bytes of mutations that were skipped because of failure during duplication\");\n}\n\nvoid load_from_private_log::set_start_decree(decree start_decree)\n{\n    _start_decree = start_decree;\n    _mutation_batch.set_start_decree(start_decree);\n}\n\nvoid load_from_private_log::start_from_log_file(log_file_ptr f)\n{\n    ddebug_replica(\"start loading from log file {}\", f->path());\n\n    _current = std::move(f);\n    _start_offset = 0;\n    _current_global_end_offset = _current->start_offset();\n    _err_block_repeats_num = 0;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/load_from_private_log.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/cpp/pipeline.h>\n#include <dsn/utility/errors.h>\n#include <dsn/dist/replication/mutation_duplicator.h>\n#include <gtest/gtest_prod.h>\n\n#include \"replica/mutation_log.h\"\n#include \"mutation_batch.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_duplicator;\nclass replica_stub;\n\n/// Loads mutations from private log into memory.\n/// It works in THREAD_POOL_REPLICATION_LONG (LPC_DUPLICATION_LOAD_MUTATIONS),\n/// which permits tasks to be executed in a blocking way.\n/// NOTE: The resulted `mutation_tuple_set` may be empty.\nclass load_from_private_log final : public replica_base,\n                                    public pipeline::when<>,\n                                    public pipeline::result<decree, mutation_tuple_set>\n{\npublic:\n    load_from_private_log(replica *r, replica_duplicator *dup);\n\n    // Start loading block from private log file.\n    // The loaded mutations will be passed down to `ship_mutation`.\n    void run() override;\n\n    void set_start_decree(decree start_decree);\n\n    /// ==== Implementation ==== ///\n\n    /// Find the log file that contains `_start_decree`.\n    void find_log_file_to_start();\n    void find_log_file_to_start(std::map<int, log_file_ptr> log_files);\n\n    void replay_log_block();\n\n    // Switches to the log file with index = current_log_index + 1.\n    // Returns true if succeeds.\n    bool switch_to_next_log_file();\n\n    void start_from_log_file(log_file_ptr f);\n\n    bool will_fail_skip() const;\n    bool will_fail_fast() const;\n\n    void TEST_set_repeat_delay(std::chrono::milliseconds delay) { _repeat_delay = delay; }\n\n    static constexpr int MAX_ALLOWED_BLOCK_REPEATS{3};\n    static constexpr int MAX_ALLOWED_FILE_REPEATS{10};\n\nprivate:\n    friend class load_from_private_log_test;\n    friend class load_fail_mode_test;\n    FRIEND_TEST(load_fail_mode_test, fail_skip);\n    FRIEND_TEST(load_fail_mode_test, fail_slow);\n    FRIEND_TEST(load_fail_mode_test, fail_skip_real_corrupted_file);\n\n    mutation_log_ptr _private_log;\n    replica_duplicator *_duplicator;\n    replica_stub *_stub;\n\n    log_file_ptr _current;\n\n    size_t _start_offset{0};\n    int64_t _current_global_end_offset{0};\n    mutation_batch _mutation_batch;\n\n    // How many times it repeats reading from current block but failed.\n    int _err_block_repeats_num{0};\n    // How many times it repeats reading current log file but failed.\n    int _err_file_repeats_num{0};\n\n    decree _start_decree{0};\n\n    perf_counter_wrapper _counter_dup_load_file_failed_count;\n    perf_counter_wrapper _counter_dup_load_skipped_bytes_count;\n    perf_counter_wrapper _counter_dup_log_read_bytes_rate;\n    perf_counter_wrapper _counter_dup_log_read_mutations_rate;\n\n    std::chrono::milliseconds _repeat_delay{10_s};\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/mutation_batch.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/cpp/message_utils.h>\n\n#include \"replica_duplicator.h\"\n#include \"mutation_batch.h\"\n\nnamespace dsn {\nnamespace replication {\n\n/*static*/ constexpr int64_t mutation_batch::PREPARE_LIST_NUM_ENTRIES;\n\nmutation_buffer::mutation_buffer(replica_base *r,\n                                 decree init_decree,\n                                 int max_count,\n                                 mutation_committer committer)\n    : prepare_list(r, init_decree, max_count, committer)\n{\n    auto counter_str = fmt::format(\"dup_recent_mutation_loss_count@{}\", r->get_gpid());\n    _counter_dulication_mutation_loss_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n}\n\nvoid mutation_buffer::commit(decree d, commit_type ct)\n{\n    if (d <= last_committed_decree())\n        return;\n\n    if (ct != COMMIT_TO_DECREE_HARD) {\n        dassert_replica(false, \"invalid commit type {}\", (int)ct);\n    }\n\n    ballot last_bt = 0;\n    for (decree d0 = last_committed_decree() + 1; d0 <= d; d0++) {\n        mutation_ptr next_committed_mutation = get_mutation_by_decree(d0);\n        // The unexpected case as follow: next_committed_decree is out of prepare_list[start~end]\n        //\n        // last_committed_decree - next_committed_decree\n        //                         |                                                  |\n        //                        n                                              n+1\n        //\n        //  [min_decree------max_decree]\n        //                |                                |\n        //             n+m(m>1)            n+k(k>=m)\n        //\n        // just derror but not dassert if mutation loss or other problem, it's different from base\n        // class implement. And from the error and perf-counter, we can choose restart duplication\n        // or ignore the loss.\n        if (next_committed_mutation == nullptr || !next_committed_mutation->is_logged()) {\n            derror_replica(\"mutation[{}] is lost in prepare_list: \"\n                           \"prepare_last_committed_decree={}, prepare_min_decree={}, \"\n                           \"prepare_max_decree={}\",\n                           d0,\n                           last_committed_decree(),\n                           min_decree(),\n                           max_decree());\n            _counter_dulication_mutation_loss_count->set(min_decree() - last_committed_decree());\n            // if next_commit_mutation loss, let last_commit_decree catch up  with min_decree, and\n            // the next loop will commit from min_decree\n            _last_committed_decree = min_decree() - 1;\n            return;\n        }\n\n        dcheck_ge_replica(next_committed_mutation->data.header.ballot, last_bt);\n        _last_committed_decree++;\n        last_bt = next_committed_mutation->data.header.ballot;\n        _committer(next_committed_mutation);\n    }\n}\n\nerror_s mutation_batch::add(mutation_ptr mu)\n{\n    if (mu->get_decree() <= _mutation_buffer->last_committed_decree()) {\n        // ignore\n        return error_s::ok();\n    }\n\n    auto old = _mutation_buffer->get_mutation_by_decree(mu->get_decree());\n    if (old != nullptr && old->data.header.ballot >= mu->data.header.ballot) {\n        // ignore\n        return error_s::ok();\n    }\n\n    error_code ec = _mutation_buffer->prepare(mu, partition_status::PS_INACTIVE);\n    if (ec != ERR_OK) {\n        return FMT_ERR(\n            ERR_INVALID_DATA,\n            \"failed to add mutation [err:{}, logged:{}, decree:{}, committed:{}, start_decree:{}]\",\n            ec.to_string(),\n            mu->is_logged(),\n            mu->get_decree(),\n            mu->data.header.last_committed_decree,\n            _start_decree);\n    }\n\n    return error_s::ok();\n}\n\ndecree mutation_batch::last_decree() const { return _mutation_buffer->last_committed_decree(); }\n\nvoid mutation_batch::set_start_decree(decree d) { _start_decree = d; }\n\nvoid mutation_batch::reset_mutation_buffer(decree d) { _mutation_buffer->reset(d); }\n\nmutation_tuple_set mutation_batch::move_all_mutations()\n{\n    // free the internal space\n    _mutation_buffer->truncate(last_decree());\n    _total_bytes = 0;\n    return std::move(_loaded_mutations);\n}\n\nmutation_batch::mutation_batch(replica_duplicator *r) : replica_base(r)\n{\n    // Prepend a special tag identifying this is a mutation_batch,\n    // so `dxxx_replica` logging in prepare_list will print along with its real caller.\n    // This helps for debugging.\n    replica_base base(\n        r->get_gpid(), std::string(\"mutation_batch@\") + r->replica_name(), r->app_name());\n    _mutation_buffer =\n        make_unique<mutation_buffer>(&base, 0, PREPARE_LIST_NUM_ENTRIES, [this](mutation_ptr &mu) {\n            // committer\n            add_mutation_if_valid(mu, _start_decree);\n        });\n\n    // start duplication from confirmed_decree\n    _mutation_buffer->reset(r->progress().confirmed_decree);\n}\n\nvoid mutation_batch::add_mutation_if_valid(mutation_ptr &mu, decree start_decree)\n{\n    if (mu->get_decree() < start_decree) {\n        // ignore\n        return;\n    }\n    for (mutation_update &update : mu->data.updates) {\n        // ignore WRITE_EMPTY\n        if (update.code == RPC_REPLICATION_WRITE_EMPTY) {\n            continue;\n        }\n        // Ignore non-idempotent writes.\n        // Normally a duplicating replica will reply non-idempotent writes with\n        // ERR_OPERATION_DISABLED, but there could still be a mutation written\n        // before the duplication was added.\n        // To ignore means this write will be lost, which is acceptable under this rare case.\n        if (!task_spec::get(update.code)->rpc_request_is_write_idempotent) {\n            continue;\n        }\n        blob bb;\n        if (update.data.buffer() != nullptr) {\n            bb = std::move(update.data);\n        } else {\n            bb = blob::create_from_bytes(update.data.data(), update.data.length());\n        }\n\n        _total_bytes += bb.length();\n        _loaded_mutations.emplace(\n            std::make_tuple(mu->data.header.timestamp, update.code, std::move(bb)));\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/mutation_batch.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/dist/replication/mutation_duplicator.h>\n\n#include \"replica/mutation.h\"\n#include \"replica/prepare_list.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_duplicator;\n\nclass mutation_buffer : public prepare_list\n{\npublic:\n    mutation_buffer(replica_base *r,\n                    decree init_decree,\n                    int max_count,\n                    mutation_committer committer);\n\n    void commit(decree d, commit_type ct);\n\nprivate:\n    perf_counter_wrapper _counter_dulication_mutation_loss_count;\n};\n\n// A sorted array of committed mutations that are ready for duplication.\n// Not thread-safe.\nclass mutation_batch : replica_base\n{\npublic:\n    static constexpr int64_t PREPARE_LIST_NUM_ENTRIES{200};\n\n    explicit mutation_batch(replica_duplicator *r);\n\n    error_s add(mutation_ptr mu);\n\n    void add_mutation_if_valid(mutation_ptr &, decree start_decree);\n\n    mutation_tuple_set move_all_mutations();\n\n    decree last_decree() const;\n\n    // mutations with decree < d will be ignored.\n    void set_start_decree(decree d);\n\n    void reset_mutation_buffer(decree d);\n\n    size_t size() const { return _loaded_mutations.size(); }\n\n    uint64_t bytes() const { return _total_bytes; }\n\nprivate:\n    friend class replica_duplicator_test;\n    friend class mutation_batch_test;\n\n    std::unique_ptr<prepare_list> _mutation_buffer;\n    mutation_tuple_set _loaded_mutations;\n    decree _start_decree{invalid_decree};\n    uint64_t _total_bytes{0};\n};\n\nusing mutation_batch_u_ptr = std::unique_ptr<mutation_batch>;\n\n/// Extract mutations into mutation_tuple_set if they are not WRITE_EMPTY.\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_duplicator.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica_duplicator.h\"\n#include \"load_from_private_log.h\"\n#include \"duplication_pipeline.h\"\n#include \"replica/replica_stub.h\"\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n#include <rapidjson/writer.h>\n\nnamespace dsn {\nnamespace replication {\n\nreplica_duplicator::replica_duplicator(const duplication_entry &ent, replica *r)\n    : replica_base(r),\n      _id(ent.dupid),\n      _remote_cluster_name(ent.remote),\n      _replica(r),\n      _stub(r->get_replica_stub())\n{\n    _status = ent.status;\n\n    auto it = ent.progress.find(get_gpid().get_partition_index());\n    if (it->second == invalid_decree) {\n        // keep current max committed_decree as start point.\n        // todo(jiashuo1) _start_point_decree hasn't be ready to persist zk, so if master restart,\n        // the value will be reset 0\n        _start_point_decree = _progress.last_decree = _replica->private_log()->max_commit_on_disk();\n    } else {\n        _progress.last_decree = _progress.confirmed_decree = it->second;\n    }\n    ddebug_replica(\"initialize replica_duplicator[{}] [dupid:{}, meta_confirmed_decree:{}]\",\n                   duplication_status_to_string(_status),\n                   id(),\n                   it->second);\n    thread_pool(LPC_REPLICATION_LOW).task_tracker(tracker()).thread_hash(get_gpid().thread_hash());\n\n    if (_status == duplication_status::DS_PREPARE) {\n        prepare_dup();\n    } else if (_status == duplication_status::DS_LOG) {\n        start_dup_log();\n    }\n}\n\nvoid replica_duplicator::prepare_dup()\n{\n    ddebug_replica(\"start prepare checkpoint to catch up with latest durable decree: \"\n                   \"start_point_decree({}) < last_durable_decree({}) = {}\",\n                   _start_point_decree,\n                   _replica->last_durable_decree(),\n                   _start_point_decree < _replica->last_durable_decree());\n\n    tasking::enqueue(\n        LPC_REPLICATION_COMMON,\n        &_tracker,\n        [this]() { _replica->trigger_manual_emergency_checkpoint(_start_point_decree); },\n        get_gpid().thread_hash());\n}\n\nvoid replica_duplicator::start_dup_log()\n{\n    ddebug_replica(\"starting duplication {} [last_decree: {}, confirmed_decree: {}]\",\n                   to_string(),\n                   _progress.last_decree,\n                   _progress.confirmed_decree);\n\n    /// ===== pipeline declaration ===== ///\n\n    // load -> ship -> load\n    _ship = make_unique<ship_mutation>(this);\n    _load_private = make_unique<load_from_private_log>(_replica, this);\n    _load = make_unique<load_mutation>(this, _replica, _load_private.get());\n\n    from(*_load).link(*_ship).link(*_load);\n    fork(*_load_private, LPC_REPLICATION_LONG_LOW, 0).link(*_ship);\n\n    run_pipeline();\n}\n\nvoid replica_duplicator::pause_dup_log()\n{\n    ddebug_replica(\"pausing duplication: {}\", to_string());\n\n    pause();\n    cancel_all();\n\n    _load.reset();\n    _ship.reset();\n    _load_private.reset();\n\n    ddebug_replica(\"duplication paused: {}\", to_string());\n}\n\nstd::string replica_duplicator::to_string() const\n{\n    rapidjson::Document doc;\n    doc.SetObject();\n    auto &alloc = doc.GetAllocator();\n\n    doc.AddMember(\"dupid\", id(), alloc);\n    doc.AddMember(\"status\", rapidjson::StringRef(duplication_status_to_string(_status)), alloc);\n    doc.AddMember(\"remote\", rapidjson::StringRef(_remote_cluster_name.data()), alloc);\n    doc.AddMember(\"confirmed\", _progress.confirmed_decree, alloc);\n    doc.AddMember(\"app\",\n                  rapidjson::StringRef(_replica->get_app_info()->app_name.data(),\n                                       _replica->get_app_info()->app_name.size()),\n                  alloc);\n\n    rapidjson::StringBuffer sb;\n    rapidjson::Writer<rapidjson::StringBuffer> writer(sb);\n    doc.Accept(writer);\n    return sb.GetString();\n}\n\nvoid replica_duplicator::update_status_if_needed(duplication_status::type next_status)\n{\n    if (is_duplication_status_invalid(next_status)) {\n        derror_replica(\"unexpected duplication status ({})\",\n                       duplication_status_to_string(next_status));\n        return;\n    }\n\n    // DS_PREPARE means replica is checkpointing, it may need trigger multi time to catch\n    // _start_point_decree of the plog\n    if (_status == next_status && next_status != duplication_status::DS_PREPARE) {\n        return;\n    }\n\n    ddebug_replica(\n        \"update duplication status: {}=>{}[start_point={}, last_commit={}, last_durable={}]\",\n        duplication_status_to_string(_status),\n        duplication_status_to_string(next_status),\n        _start_point_decree,\n        _replica->last_committed_decree(),\n        _replica->last_durable_decree());\n\n    _status = next_status;\n    if (_status == duplication_status::DS_PREPARE) {\n        prepare_dup();\n        return;\n    }\n\n    // DS_APP means the replica follower is duplicate checkpoint from master, just return and wait\n    // next loop\n    if (_status == duplication_status::DS_APP) {\n        return;\n    }\n\n    if (_status == duplication_status::DS_LOG) {\n        start_dup_log();\n        return;\n    }\n\n    if (_status == duplication_status::DS_PAUSE) {\n        pause_dup_log();\n        return;\n    }\n}\n\nreplica_duplicator::~replica_duplicator()\n{\n    pause();\n    cancel_all();\n    ddebug_replica(\"closing duplication {}\", to_string());\n}\n\nerror_s replica_duplicator::update_progress(const duplication_progress &p)\n{\n    zauto_write_lock l(_lock);\n\n    if (p.confirmed_decree >= 0 && p.confirmed_decree < _progress.confirmed_decree) {\n        return FMT_ERR(ERR_INVALID_STATE,\n                       \"never decrease confirmed_decree: new({}) old({})\",\n                       p.confirmed_decree,\n                       _progress.confirmed_decree);\n    }\n\n    decree last_confirmed_decree = _progress.confirmed_decree;\n    _progress.confirmed_decree = std::max(_progress.confirmed_decree, p.confirmed_decree);\n    _progress.last_decree = std::max(_progress.last_decree, p.last_decree);\n    _progress.checkpoint_has_prepared = _start_point_decree <= _replica->last_durable_decree();\n\n    if (_progress.confirmed_decree > _progress.last_decree) {\n        return FMT_ERR(ERR_INVALID_STATE,\n                       \"last_decree({}) should always larger than confirmed_decree({})\",\n                       _progress.last_decree,\n                       _progress.confirmed_decree);\n    }\n    if (_progress.confirmed_decree > last_confirmed_decree) {\n        // has confirmed_decree updated.\n        _stub->_counter_dup_confirmed_rate->add(_progress.confirmed_decree - last_confirmed_decree);\n    }\n\n    return error_s::ok();\n}\n\nvoid replica_duplicator::verify_start_decree(decree start_decree)\n{\n    decree confirmed_decree = progress().confirmed_decree;\n    decree last_decree = progress().last_decree;\n    decree max_gced_decree = get_max_gced_decree();\n    dassert_f(max_gced_decree < start_decree,\n              \"the logs haven't yet duplicated were accidentally truncated \"\n              \"[max_gced_decree: {}, start_decree: {}, confirmed_decree: {}, last_decree: {}]\",\n              max_gced_decree,\n              start_decree,\n              confirmed_decree,\n              last_decree);\n}\n\ndecree replica_duplicator::get_max_gced_decree() const\n{\n    return _replica->private_log()->max_gced_decree(_replica->get_gpid());\n}\n\nuint64_t replica_duplicator::get_pending_mutations_count() const\n{\n    // it's not atomic to read last_committed_decree in not-REPLICATION thread pool,\n    // but enough for approximate statistic.\n    int64_t cnt = _replica->last_committed_decree() - progress().last_decree;\n    // since last_committed_decree() is not atomic, `cnt` could probably be negative.\n    return cnt > 0 ? static_cast<uint64_t>(cnt) : 0;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_duplicator.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/cpp/pipeline.h>\n#include <dsn/dist/replication/replica_base.h>\n#include <dsn/dist/replication.h>\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass duplication_progress\n{\npublic:\n    // check if checkpoint has catch up with `_start_point_decree`\n    bool checkpoint_has_prepared{false};\n    // the maximum decree that's been persisted in meta server\n    decree confirmed_decree{invalid_decree};\n\n    // the maximum decree that's been duplicated to remote.\n    decree last_decree{invalid_decree};\n\n    duplication_progress &set_last_decree(decree d)\n    {\n        last_decree = d;\n        return *this;\n    }\n\n    duplication_progress &set_confirmed_decree(decree d)\n    {\n        confirmed_decree = d;\n        return *this;\n    }\n};\n\nclass load_mutation;\nclass ship_mutation;\nclass load_from_private_log;\nclass replica;\nclass replica_stub;\n\n// Each replica_duplicator is responsible for one duplication.\n// It works in THREAD_POOL_REPLICATION (LPC_DUPLICATE_MUTATIONS),\n// sharded by gpid, thus all functions are single-threaded,\n// no read lock required (of course write lock is necessary when\n// reader could be in other thread).\n//\n// TODO(wutao1): Optimization for multi-duplication\n//               Currently we create duplicator for each duplication.\n//               They're isolated even if they share the same private log.\nclass replica_duplicator : public replica_base, public pipeline::base\n{\npublic:\n    replica_duplicator(const duplication_entry &ent, replica *r);\n\n    // This is a blocking call.\n    // The thread may be seriously blocked under the destruction.\n    // Take care when running in THREAD_POOL_REPLICATION, though\n    // duplication removal is extremely rare.\n    ~replica_duplicator();\n\n    // Updates this duplication to `next_status`.\n    // Not thread-safe.\n    void update_status_if_needed(duplication_status::type next_status);\n\n    void update_fail_mode(duplication_fail_mode::type fmode)\n    {\n        _fail_mode.store(fmode, std::memory_order_relaxed);\n    }\n    duplication_fail_mode::type fail_mode() const\n    {\n        return _fail_mode.load(std::memory_order_relaxed);\n    }\n\n    dupid_t id() const { return _id; }\n\n    const std::string &remote_cluster_name() const { return _remote_cluster_name; }\n\n    // Thread-safe\n    duplication_progress progress() const\n    {\n        zauto_read_lock l(_lock);\n        return _progress;\n    }\n\n    // Thread-safe\n    error_s update_progress(const duplication_progress &p);\n\n    void prepare_dup();\n\n    void start_dup_log();\n\n    // Pausing duplication will clear all the internal volatile states, thus\n    // when next time it restarts, the states will be reinitialized like the\n    // server being restarted.\n    // It is useful when something went wrong internally.\n    void pause_dup_log();\n\n    // Holds its own tracker, so that other tasks\n    // won't be effected when this duplication is removed.\n    dsn::task_tracker *tracker() { return &_tracker; }\n\n    std::string to_string() const;\n\n    // To ensure mutation logs after start_decree is available\n    // for duplication. If not, it means the eventual consistency\n    // of duplication is no longer guaranteed due to the missing logs.\n    // For current implementation the system will fail fast.\n    void verify_start_decree(decree start_decree);\n\n    decree get_max_gced_decree() const;\n\n    // For metric \"dup.pending_mutations_count\"\n    uint64_t get_pending_mutations_count() const;\n\n    duplication_status::type status() const { return _status; };\n\nprivate:\n    friend class duplication_test_base;\n    friend class replica_duplicator_test;\n    friend class duplication_sync_timer_test;\n    friend class load_from_private_log_test;\n    friend class ship_mutation_test;\n\n    friend class load_mutation;\n    friend class ship_mutation;\n\n    const dupid_t _id;\n    const std::string _remote_cluster_name;\n\n    replica *_replica;\n    replica_stub *_stub;\n    dsn::task_tracker _tracker;\n\n    decree _start_point_decree = invalid_decree;\n    duplication_status::type _status{duplication_status::DS_INIT};\n    std::atomic<duplication_fail_mode::type> _fail_mode{duplication_fail_mode::FAIL_SLOW};\n\n    // protect the access of _progress.\n    mutable zrwlock_nr _lock;\n    duplication_progress _progress;\n\n    /// === pipeline === ///\n    std::unique_ptr<load_mutation> _load;\n    std::unique_ptr<ship_mutation> _ship;\n    std::unique_ptr<load_from_private_log> _load_private;\n};\n\ntypedef std::unique_ptr<replica_duplicator> replica_duplicator_u_ptr;\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_duplicator_manager.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/replication/duplication_common.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"replica_duplicator_manager.h\"\n\nnamespace dsn {\nnamespace replication {\n\nstd::vector<duplication_confirm_entry>\nreplica_duplicator_manager::get_duplication_confirms_to_update() const\n{\n    zauto_lock l(_lock);\n\n    std::vector<duplication_confirm_entry> updates;\n    for (const auto &kv : _duplications) {\n        replica_duplicator *duplicator = kv.second.get();\n        duplication_progress p = duplicator->progress();\n        if (p.last_decree != p.confirmed_decree ||\n            (kv.second->status() == duplication_status::DS_PREPARE && p.checkpoint_has_prepared)) {\n            if (p.last_decree < p.confirmed_decree) {\n                derror_replica(\"invalid decree state: p.last_decree({}) < p.confirmed_decree({})\",\n                               p.last_decree,\n                               p.confirmed_decree);\n                continue;\n            }\n            duplication_confirm_entry entry;\n            entry.dupid = duplicator->id();\n            entry.confirmed_decree = p.last_decree;\n            entry.__set_checkpoint_prepared(p.checkpoint_has_prepared);\n            updates.emplace_back(entry);\n        }\n    }\n    return updates;\n}\n\nvoid replica_duplicator_manager::sync_duplication(const duplication_entry &ent)\n{\n    // state is inconsistent with meta-server\n    auto it = ent.progress.find(get_gpid().get_partition_index());\n    if (it == ent.progress.end()) {\n        _duplications.erase(ent.dupid);\n        return;\n    }\n\n    zauto_lock l(_lock);\n\n    dupid_t dupid = ent.dupid;\n    duplication_status::type next_status = ent.status;\n\n    replica_duplicator_u_ptr &dup = _duplications[dupid];\n    if (dup == nullptr) {\n        if (!is_duplication_status_invalid(next_status)) {\n            dup = make_unique<replica_duplicator>(ent, _replica);\n        } else {\n            derror_replica(\"illegal duplication status: {}\",\n                           duplication_status_to_string(next_status));\n        }\n    } else {\n        // update progress\n        duplication_progress newp = dup->progress().set_confirmed_decree(it->second);\n        dcheck_eq_replica(dup->update_progress(newp), error_s::ok());\n        dup->update_status_if_needed(next_status);\n        if (ent.__isset.fail_mode) {\n            dup->update_fail_mode(ent.fail_mode);\n        }\n    }\n}\n\ndecree replica_duplicator_manager::min_confirmed_decree() const\n{\n    zauto_lock l(_lock);\n\n    decree min_decree = invalid_decree;\n    if (_replica->status() == partition_status::PS_PRIMARY) {\n        for (auto &kv : _duplications) {\n            const duplication_progress &p = kv.second->progress();\n            if (min_decree == invalid_decree) {\n                min_decree = p.confirmed_decree;\n            } else {\n                min_decree = std::min(min_decree, p.confirmed_decree);\n            }\n        }\n    } else if (_primary_confirmed_decree > 0) {\n        // if the replica is not primary, use the latest known (from primary)\n        // confirmed_decree instead.\n        min_decree = _primary_confirmed_decree;\n    }\n    return min_decree;\n}\n\n// Remove the duplications that are not in the `new_dup_map`.\n// NOTE: this function may be blocked when destroying replica_duplicator.\nvoid replica_duplicator_manager::remove_non_existed_duplications(\n    const std::map<dupid_t, duplication_entry> &new_dup_map)\n{\n    zauto_lock l(_lock);\n    std::vector<dupid_t> removal_set;\n    for (auto &pair : _duplications) {\n        dupid_t cur_dupid = pair.first;\n        if (new_dup_map.find(cur_dupid) == new_dup_map.end()) {\n            removal_set.emplace_back(cur_dupid);\n        }\n    }\n\n    for (dupid_t dupid : removal_set) {\n        _duplications.erase(dupid);\n    }\n}\n\nvoid replica_duplicator_manager::update_confirmed_decree_if_secondary(decree confirmed)\n{\n    // this function always runs in the same single thread with config-sync\n    if (_replica->status() != partition_status::PS_SECONDARY) {\n        return;\n    }\n\n    zauto_lock l(_lock);\n    remove_all_duplications();\n    if (confirmed >= 0) { // duplication ongoing\n        // confirmed decree never decreases\n        if (_primary_confirmed_decree < confirmed) {\n            _primary_confirmed_decree = confirmed;\n        }\n    } else { // duplication add with freeze but no start or no duplication(include removed)\n        _primary_confirmed_decree = confirmed;\n    }\n}\n\nint64_t replica_duplicator_manager::get_pending_mutations_count() const\n{\n    int64_t total = 0;\n    for (const auto &dup : _duplications) {\n        total += dup.second->get_pending_mutations_count();\n    }\n    return total;\n}\n\nstd::vector<replica_duplicator_manager::dup_state>\nreplica_duplicator_manager::get_dup_states() const\n{\n    zauto_lock l(_lock);\n\n    std::vector<dup_state> ret;\n    ret.reserve(_duplications.size());\n    for (const auto &dup : _duplications) {\n        dup_state state;\n        state.dupid = dup.first;\n        state.duplicating = !dup.second->paused();\n        auto progress = dup.second->progress();\n        state.last_decree = progress.last_decree;\n        state.confirmed_decree = progress.confirmed_decree;\n        state.fail_mode = dup.second->fail_mode();\n        ret.emplace_back(state);\n    }\n    return ret;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_duplicator_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"replica_duplicator.h\"\n\n#include <dsn/dist/replication/replication_types.h>\n#include <dsn/dist/replication/duplication_common.h>\n\n#include \"replica/replica.h\"\n#include \"replica/mutation_log.h\"\n\nnamespace dsn {\nnamespace replication {\n\n/// replica_duplicator_manager manages the set of duplications on this replica.\n/// \\see duplication_sync_timer\n\n/// Ref-Issue: https://github.com/apache/incubator-pegasus/issues/892\nclass replica_duplicator_manager : public replica_base\n{\npublic:\n    explicit replica_duplicator_manager(replica *r) : replica_base(r), _replica(r) {}\n\n    // Immediately stop duplication in the following conditions:\n    // - replica is not primary on replica-server perspective (status != PRIMARY)\n    // - replica is not primary on meta-server perspective (progress.find(partition_id) == end())\n    // - the app is not assigned with duplication (dup_map.empty())\n    void update_duplication_map(const std::map<int32_t, duplication_entry> &new_dup_map)\n    {\n        if (_replica->status() != partition_status::PS_PRIMARY || new_dup_map.empty()) {\n            remove_all_duplications();\n            return;\n        }\n\n        remove_non_existed_duplications(new_dup_map);\n\n        for (const auto &kv2 : new_dup_map) {\n            sync_duplication(kv2.second);\n        }\n    }\n\n    /// collect updated duplication confirm points from this replica.\n    std::vector<duplication_confirm_entry> get_duplication_confirms_to_update() const;\n\n    /// mutations <= min_confirmed_decree are assumed to be cleanable.\n    /// If there's no duplication,　invalid_decree is returned, mean that all logs are cleanable.\n    /// THREAD_POOL_REPLICATION\n    /// \\see replica::on_checkpoint_timer()\n    decree min_confirmed_decree() const;\n\n    /// Updates the latest known confirmed decree on this replica if it's secondary.\n    /// THREAD_POOL_REPLICATION\n    /// \\see replica_check.cpp\n    void update_confirmed_decree_if_secondary(decree confirmed);\n\n    /// Sums up the number of pending mutations for all duplications\n    /// on this replica, for metric \"dup.pending_mutations_count\".\n    int64_t get_pending_mutations_count() const;\n\n    struct dup_state\n    {\n        dupid_t dupid{0};\n        bool duplicating{false};\n        decree last_decree{invalid_decree};\n        decree confirmed_decree{invalid_decree};\n        duplication_fail_mode::type fail_mode{duplication_fail_mode::FAIL_SLOW};\n    };\n    std::vector<dup_state> get_dup_states() const;\n\nprivate:\n    void sync_duplication(const duplication_entry &ent);\n\n    void remove_non_existed_duplications(const std::map<dupid_t, duplication_entry> &);\n\n    void remove_all_duplications()\n    {\n        // fast path\n        if (_duplications.empty())\n            return;\n\n        dwarn_replica(\"remove all duplication, replica status = {}\",\n                      enum_to_string(_replica->status()));\n        _duplications.clear();\n    }\n\nprivate:\n    friend class duplication_sync_timer_test;\n    friend class duplication_test_base;\n    friend class replica_duplicator_manager_test;\n\n    replica *_replica;\n\n    std::map<dupid_t, replica_duplicator_u_ptr> _duplications;\n\n    decree _primary_confirmed_decree{invalid_decree};\n\n    // avoid thread conflict between replica::on_checkpoint_timer and\n    // duplication_sync_timer.\n    mutable zlock _lock;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_follower.cpp",
    "content": "/*\n* Licensed to the Apache Software Foundation (ASF) under one\n* or more contributor license agreements.  See the NOTICE file\n* distributed with this work for additional information\n* regarding copyright ownership.  The ASF licenses this file\n* to you under the Apache License, Version 2.0 (the\n* \"License\"); you may not use this file except in compliance\n* with the License.  You may obtain a copy of the License at\n*\n*   http://www.apache.org/licenses/LICENSE-2.0\n*\n* Unless required by applicable law or agreed to in writing,\n* software distributed under the License is distributed on an\n* \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n* KIND, either express or implied.  See the License for the\n* specific language governing permissions and limitations\n* under the License.\n*/\n\n#include \"replica_follower.h\"\n#include \"replica/replica_stub.h\"\n#include \"dsn/utility/filesystem.h\"\n#include \"dsn/dist/replication/duplication_common.h\"\n\n#include <boost/algorithm/string.hpp>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/dist/nfs_node.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nreplica_follower::replica_follower(replica *r) : replica_base(r), _replica(r)\n{\n    init_master_info();\n}\n\nreplica_follower::~replica_follower() { _tracker.wait_outstanding_tasks(); }\n\n// ThreadPool: THREAD_POOL_REPLICATION_LONG\nvoid replica_follower::init_master_info()\n{\n    const auto &envs = _replica->get_app_info()->envs;\n\n    if (envs.find(duplication_constants::kDuplicationEnvMasterClusterKey) == envs.end() ||\n        envs.find(duplication_constants::kDuplicationEnvMasterMetasKey) == envs.end()) {\n        return;\n    }\n\n    need_duplicate = true;\n\n    _master_cluster_name = envs.at(duplication_constants::kDuplicationEnvMasterClusterKey);\n    _master_app_name = _replica->get_app_info()->app_name;\n\n    const auto &meta_list_str = envs.at(duplication_constants::kDuplicationEnvMasterMetasKey);\n    std::vector<std::string> metas;\n    boost::split(metas, meta_list_str, boost::is_any_of(\",\"));\n    dassert_f(!metas.empty(), \"master cluster meta list is invalid!\");\n    for (const auto &meta : metas) {\n        dsn::rpc_address node;\n        dassert_f(node.from_string_ipv4(meta.c_str()), \"{} is invalid meta address\", meta);\n        _master_meta_list.emplace_back(std::move(node));\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION_LONG\nerror_code replica_follower::duplicate_checkpoint()\n{\n    zauto_lock l(_lock);\n    if (_duplicating_checkpoint) {\n        dwarn_replica(\"duplicate master[{}] checkpoint is running\", master_replica_name());\n        return ERR_BUSY;\n    }\n\n    ddebug_replica(\"start duplicate master[{}] checkpoint\", master_replica_name());\n    _duplicating_checkpoint = true;\n    tasking::enqueue(LPC_DUPLICATE_CHECKPOINT, &_tracker, [=]() mutable {\n        async_duplicate_checkpoint_from_master_replica();\n    });\n    _tracker.wait_outstanding_tasks();\n    _duplicating_checkpoint = false;\n    if (_tracker.all_tasks_success()) {\n        _tracker.clear_tasks_state();\n        return ERR_OK;\n    }\n    return ERR_TRY_AGAIN;\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_follower::async_duplicate_checkpoint_from_master_replica()\n{\n    rpc_address meta_servers;\n    meta_servers.assign_group(_master_cluster_name.c_str());\n    meta_servers.group_address()->add_list(_master_meta_list);\n\n    configuration_query_by_index_request meta_config_request;\n    meta_config_request.app_name = _master_app_name;\n    // just fetch the same partition config\n    meta_config_request.partition_indices = {get_gpid().get_partition_index()};\n\n    ddebug_replica(\"query master[{}] replica configuration\", master_replica_name());\n    dsn::message_ex *msg = dsn::message_ex::create_request(\n        RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, 0, get_gpid().thread_hash());\n    dsn::marshall(msg, meta_config_request);\n    rpc::call(meta_servers,\n              msg,\n              &_tracker,\n              [&](error_code err, configuration_query_by_index_response &&resp) mutable {\n                  FAIL_POINT_INJECT_F(\"duplicate_checkpoint_ok\", [&](string_view s) -> void {\n                      _tracker.set_tasks_success();\n                      return;\n                  });\n\n                  FAIL_POINT_INJECT_F(\"duplicate_checkpoint_failed\",\n                                      [&](string_view s) -> void { return; });\n                  if (update_master_replica_config(err, std::move(resp)) == ERR_OK) {\n                      copy_master_replica_checkpoint();\n                  }\n              });\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nerror_code\nreplica_follower::update_master_replica_config(error_code err,\n                                               configuration_query_by_index_response &&resp)\n{\n    error_code err_code = err != ERR_OK ? err : resp.err;\n    if (dsn_unlikely(err_code != ERR_OK)) {\n        derror_replica(\n            \"query master[{}] config failed: {}\", master_replica_name(), err_code.to_string());\n        return err_code;\n    }\n\n    if (dsn_unlikely(resp.partition_count != _replica->get_app_info()->partition_count)) {\n        derror_replica(\"master[{}] partition count is inconsistent: local = {} vs master = {}\",\n                       master_replica_name(),\n                       _replica->get_app_info()->partition_count,\n                       resp.partition_count);\n        return ERR_INCONSISTENT_STATE;\n    }\n\n    if (dsn_unlikely(resp.partitions.size() != 1)) {\n        derror_replica(\"master[{}] config size must be single, but actually is {}\",\n                       master_replica_name(),\n                       resp.partitions.size());\n        return ERR_INVALID_DATA;\n    }\n\n    if (dsn_unlikely(resp.partitions[0].pid.get_partition_index() !=\n                     get_gpid().get_partition_index())) {\n        derror_replica(\"master[{}] partition index is inconsistent: local = {} vs master = {}\",\n                       master_replica_name(),\n                       get_gpid().get_partition_index(),\n                       resp.partitions[0].pid.get_partition_index());\n        return ERR_INCONSISTENT_STATE;\n    }\n\n    if (dsn_unlikely(resp.partitions[0].primary == rpc_address::s_invalid_address)) {\n        derror_replica(\"master[{}] partition address is invalid\", master_replica_name());\n        return ERR_INVALID_STATE;\n    }\n\n    // since the request just specify one partition, the result size is single\n    _master_replica_config = resp.partitions[0];\n    ddebug_replica(\n        \"query master[{}] config successfully and update local config: remote={}, gpid={}\",\n        master_replica_name(),\n        _master_replica_config.primary.to_string(),\n        _master_replica_config.pid.to_string());\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_follower::copy_master_replica_checkpoint()\n{\n    ddebug_replica(\"query master[{}] replica checkpoint info and start use nfs copy the data\",\n                   master_replica_name());\n    learn_request request;\n    request.pid = _master_replica_config.pid;\n    dsn::message_ex *msg = dsn::message_ex::create_request(\n        RPC_QUERY_LAST_CHECKPOINT_INFO, 0, _master_replica_config.pid.thread_hash());\n    dsn::marshall(msg, request);\n    rpc::call(_master_replica_config.primary,\n              msg,\n              &_tracker,\n              [&](error_code err, learn_response &&resp) mutable {\n                  nfs_copy_checkpoint(err, std::move(resp));\n              });\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nerror_code replica_follower::nfs_copy_checkpoint(error_code err, learn_response &&resp)\n{\n    error_code err_code = err != ERR_OK ? err : resp.err;\n    if (dsn_unlikely(err_code != ERR_OK)) {\n        derror_replica(\"query master[{}] replica checkpoint info failed, err = {}\",\n                       master_replica_name(),\n                       err_code.to_string());\n        return err_code;\n    }\n\n    std::string dest = utils::filesystem::path_combine(\n        _replica->dir(), duplication_constants::kDuplicationCheckpointRootDir);\n    if (!utils::filesystem::remove_path(dest)) {\n        derror_replica(\n            \"clear master[{}] replica checkpoint dest dir {} failed\", master_replica_name(), dest);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    nfs_copy_remote_files(\n        resp.address, resp.replica_disk_tag, resp.base_local_dir, resp.state.files, dest);\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_follower::nfs_copy_remote_files(const rpc_address &remote_node,\n                                             const std::string &remote_disk,\n                                             const std::string &remote_dir,\n                                             std::vector<std::string> &file_list,\n                                             const std::string &dest_dir)\n{\n    ddebug_replica(\n        \"nfs start copy master[{}] replica checkpoint: {}\", master_replica_name(), remote_dir);\n    std::shared_ptr<remote_copy_request> request = std::make_shared<remote_copy_request>();\n    request->source = remote_node;\n    request->source_disk_tag = remote_disk;\n    request->source_dir = remote_dir;\n    request->files = file_list;\n    request->dest_disk_tag = _replica->get_replica_disk_tag();\n    request->dest_dir = dest_dir;\n    request->overwrite = true;\n    request->high_priority = false;\n\n    _replica->_stub->_nfs->copy_remote_files(\n        request,\n        LPC_DUPLICATE_CHECKPOINT_COMPLETED,\n        &_tracker,\n        [&, remote_dir](error_code err, size_t size) mutable {\n            FAIL_POINT_INJECT_NOT_RETURN_F(\"nfs_copy_ok\",\n                                           [&](string_view s) -> void { err = ERR_OK; });\n\n            if (dsn_unlikely(err != ERR_OK)) {\n                derror_replica(\"nfs copy master[{}] checkpoint failed: checkpoint = {}, err = {}\",\n                               master_replica_name(),\n                               remote_dir,\n                               err.to_string());\n                return;\n            }\n            ddebug_replica(\"nfs copy master[{}] checkpoint completed: checkpoint = {}, size = {}\",\n                           master_replica_name(),\n                           remote_dir,\n                           size);\n            _tracker.set_tasks_success();\n        });\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/replica_follower.h",
    "content": "/*\n* Licensed to the Apache Software Foundation (ASF) under one\n* or more contributor license agreements.  See the NOTICE file\n* distributed with this work for additional information\n* regarding copyright ownership.  The ASF licenses this file\n* to you under the Apache License, Version 2.0 (the\n* \"License\"); you may not use this file except in compliance\n* with the License.  You may obtain a copy of the License at\n*\n*   http://www.apache.org/licenses/LICENSE-2.0\n*\n* Unless required by applicable law or agreed to in writing,\n* software distributed under the License is distributed on an\n* \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n* KIND, either express or implied.  See the License for the\n* specific language governing permissions and limitations\n* under the License.\n*/\n\n#pragma once\n#include \"replica/replica.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_follower : replica_base\n{\npublic:\n    explicit replica_follower(replica *r);\n    ~replica_follower();\n    error_code duplicate_checkpoint();\n\n    const std::string &get_master_cluster_name() const { return _master_cluster_name; };\n\n    const std::string &get_master_app_name() const { return _master_app_name; };\n\n    const std::vector<rpc_address> &get_master_meta_list() const { return _master_meta_list; };\n\n    const bool is_need_duplicate() const { return need_duplicate; }\n\nprivate:\n    replica *_replica;\n    task_tracker _tracker;\n    bool _duplicating_checkpoint{false};\n    mutable zlock _lock;\n\n    std::string _master_cluster_name;\n    std::string _master_app_name;\n    std::vector<rpc_address> _master_meta_list;\n    partition_configuration _master_replica_config;\n\n    bool need_duplicate{false};\n\n    void init_master_info();\n    void async_duplicate_checkpoint_from_master_replica();\n    error_code update_master_replica_config(error_code err,\n                                            configuration_query_by_index_response &&resp);\n    void copy_master_replica_checkpoint();\n    error_code nfs_copy_checkpoint(error_code err, learn_response &&resp);\n    void nfs_copy_remote_files(const rpc_address &remote_node,\n                               const std::string &remote_disk,\n                               const std::string &remote_dir,\n                               std::vector<std::string> &file_list,\n                               const std::string &dest_dir);\n\n    std::string master_replica_name()\n    {\n        std::string app_info = fmt::format(\"{}.{}\", _master_cluster_name, _master_app_name);\n        if (_master_replica_config.primary != rpc_address::s_invalid_address) {\n            return fmt::format(\"{}({}|{})\",\n                               app_info,\n                               _master_replica_config.primary.to_string(),\n                               _master_replica_config.pid.to_string());\n        }\n        return app_info;\n    }\n\n    friend class replica_follower_test;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replica_dup_test)\n\nset(MY_PROJ_SRC \"\")\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_meta_server\n        dsn_replica_server\n        dsn.replication.zookeeper_provider\n        dsn_replication_common\n        dsn.failure_detector\n        dsn_utils\n        zookeeper\n        hashtable\n        gtest\n)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n        config-test.ini\n        log.1.0.handle_real_private_log\n        log.1.0.all_loaded_are_write_empties\n        log.1.0.handle_real_private_log2\n        run.sh\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/duplication/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\n\n[apps.replica]\ntype = replica\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[core]\ntool = nativerun\n\n[tools.simple_logger]\nstderr_start_level = LOG_LEVEL_WARNING\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[duplication-group]\nmaster-cluster = 1\nslave-cluster  = 2\n"
  },
  {
    "path": "src/replica/duplication/test/duplication_sync_timer_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/duplication/duplication_sync_timer.h\"\n#include \"duplication_test_base.h\"\n\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/rpc_message.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass duplication_sync_timer_test : public duplication_test_base\n{\npublic:\n    void SetUp() override { dup_sync = make_unique<duplication_sync_timer>(stub.get()); }\n\n    void TearDown() override { stub.reset(); }\n\n    void test_on_duplication_sync_reply()\n    {\n        // replica: {app_id:2, partition_id:1, duplications:{}}\n        stub->add_primary_replica(2, 1);\n        ASSERT_NE(stub->find_replica(2, 1), nullptr);\n\n        // appid:2 -> dupid:1\n        duplication_entry ent;\n        ent.dupid = 1;\n        ent.remote = \"slave-cluster\";\n        ent.status = duplication_status::DS_PAUSE;\n        ent.progress[1] = 1000; // partition 1 => confirmed 1000\n        duplication_sync_response resp;\n        resp.dup_map[2] = {{ent.dupid, ent}};\n\n        dup_sync->_rpc_task = new raw_task(LPC_TEST, []() {});\n        dup_sync->on_duplication_sync_reply(ERR_OK, resp);\n        replica_duplicator *dup =\n            stub->find_replica(2, 1)->get_replica_duplicator_manager()._duplications[1].get();\n\n        ASSERT_TRUE(dup);\n        ASSERT_EQ(dup->_status, duplication_status::DS_PAUSE);\n        ASSERT_EQ(dup->_progress.confirmed_decree, 1000);\n        ASSERT_EQ(dup_sync->_rpc_task, nullptr);\n    }\n\n    void test_duplication_sync()\n    {\n        int total_app_num = 4;\n        for (int appid = 1; appid <= total_app_num; appid++) {\n            auto r = stub->add_non_primary_replica(appid, 1);\n\n            // trigger duplication sync on partition 1\n            duplication_entry ent;\n            ent.dupid = 1;\n            ent.progress[r->get_gpid().get_partition_index()] = 1000;\n            ent.status = duplication_status::DS_PAUSE;\n            auto dup = dsn::make_unique<replica_duplicator>(ent, r);\n            add_dup(r, std::move(dup));\n        }\n\n        RPC_MOCKING(duplication_sync_rpc)\n        {\n            {\n                // replica server should not sync to meta when it's disconnected\n                dup_sync->run();\n                ASSERT_EQ(duplication_sync_rpc::mail_box().size(), 0);\n            }\n            {\n                // never collects confirm points from non-primaries\n                stub->set_state_connected();\n                dup_sync->run();\n                ASSERT_EQ(duplication_sync_rpc::mail_box().size(), 1);\n\n                auto &req = duplication_sync_rpc::mail_box().back().request();\n                ASSERT_EQ(req.confirm_list.size(), 0);\n            }\n        }\n\n        RPC_MOCKING(duplication_sync_rpc)\n        {\n            for (auto &e : stub->mock_replicas) {\n                e.second->as_primary();\n            }\n            dup_sync->run();\n            ASSERT_EQ(duplication_sync_rpc::mail_box().size(), 1);\n\n            auto &req = duplication_sync_rpc::mail_box().back().request();\n            ASSERT_EQ(req.node, stub->primary_address());\n\n            // ensure confirm list is empty when no progress\n            ASSERT_EQ(req.confirm_list.size(), 0);\n\n            // ensure this rpc has timeout set.\n            auto &rpc = duplication_sync_rpc::mail_box().back();\n            ASSERT_GT(rpc.dsn_request()->header->client.timeout_ms, 0);\n        }\n\n        RPC_MOCKING(duplication_sync_rpc)\n        {\n            for (int appid = 1; appid <= total_app_num; appid++) {\n                auto &dup = stub->mock_replicas[gpid(appid, 1)]\n                                ->get_replica_duplicator_manager()\n                                ._duplications[1];\n                dup->update_progress(duplication_progress().set_last_decree(1500));\n            }\n\n            dup_sync->run();\n            ASSERT_EQ(duplication_sync_rpc::mail_box().size(), 1);\n\n            auto &req = *duplication_sync_rpc::mail_box().back().mutable_request();\n            ASSERT_EQ(req.node, stub->primary_address());\n            ASSERT_EQ(req.confirm_list.size(), total_app_num);\n\n            for (int appid = 1; appid <= total_app_num; appid++) {\n                ASSERT_TRUE(req.confirm_list.find(gpid(appid, 1)) != req.confirm_list.end());\n\n                auto dup_list = req.confirm_list[gpid(appid, 1)];\n                ASSERT_EQ(dup_list.size(), 1);\n\n                auto dup = dup_list[0];\n                ASSERT_EQ(dup.dupid, 1);\n                ASSERT_EQ(dup.confirmed_decree, 1500);\n            }\n        }\n    }\n\n    void test_update_duplication_map()\n    {\n        std::map<int32_t, std::map<dupid_t, duplication_entry>> dup_map;\n        for (int32_t appid = 1; appid <= 10; appid++) {\n            for (int partition_id = 0; partition_id < 3; partition_id++) {\n                stub->add_primary_replica(appid, partition_id);\n            }\n        }\n\n        { // Ensure update_duplication_map adds new duplications if they are not existed.\n            duplication_entry ent;\n            ent.dupid = 2;\n            ent.status = duplication_status::DS_PAUSE;\n            for (int i = 0; i < 3; i++) {\n                ent.progress[i] = 0;\n            }\n\n            // add duplication 2 for app 1, 3, 5 (of course in real world cases duplication\n            // will not be the same for different tables)\n            dup_map[1][ent.dupid] = ent;\n            dup_map[3][ent.dupid] = ent;\n            dup_map[5][ent.dupid] = ent;\n\n            dup_sync->update_duplication_map(dup_map);\n\n            for (int32_t appid : {1, 3, 5}) {\n                for (int partition_id : {0, 1, 2}) {\n                    auto dup = find_dup(stub->find_replica(appid, partition_id), 2);\n                    ASSERT_TRUE(dup);\n                }\n            }\n\n            // update duplicated decree of 1, 3, 5 to 2\n            auto dup = find_dup(stub->find_replica(1, 1), 2);\n            dup->update_progress(dup->progress().set_last_decree(2));\n\n            dup = find_dup(stub->find_replica(3, 1), 2);\n            dup->update_progress(dup->progress().set_last_decree(2));\n\n            dup = find_dup(stub->find_replica(5, 1), 2);\n            dup->update_progress(dup->progress().set_last_decree(2));\n        }\n\n        RPC_MOCKING(duplication_sync_rpc)\n        {\n            stub->set_state_connected();\n            dup_sync->run();\n            ASSERT_EQ(duplication_sync_rpc::mail_box().size(), 1);\n\n            auto &req = duplication_sync_rpc::mail_box().back().request();\n            ASSERT_EQ(req.confirm_list.size(), 3);\n\n            ASSERT_TRUE(req.confirm_list.find(gpid(1, 1)) != req.confirm_list.end());\n            ASSERT_TRUE(req.confirm_list.find(gpid(3, 1)) != req.confirm_list.end());\n            ASSERT_TRUE(req.confirm_list.find(gpid(5, 1)) != req.confirm_list.end());\n        }\n\n        {\n            dup_map.erase(3);\n            dup_sync->update_duplication_map(dup_map);\n            ASSERT_TRUE(find_dup(stub->find_replica(1, 1), 2) != nullptr);\n            ASSERT_TRUE(find_dup(stub->find_replica(3, 1), 2) == nullptr);\n            ASSERT_TRUE(find_dup(stub->find_replica(5, 1), 2) != nullptr);\n        }\n\n        {\n            dup_map.clear();\n            dup_sync->update_duplication_map(dup_map);\n            ASSERT_TRUE(find_dup(stub->find_replica(1, 1), 2) == nullptr);\n            ASSERT_TRUE(find_dup(stub->find_replica(3, 1), 2) == nullptr);\n            ASSERT_TRUE(find_dup(stub->find_replica(5, 1), 2) == nullptr);\n        }\n    }\n\n    void test_update_on_non_primary()\n    {\n        stub->add_non_primary_replica(2, 1);\n\n        duplication_entry ent;\n        ent.dupid = 1;\n        ent.status = duplication_status::DS_PAUSE;\n\n        std::map<int32_t, std::map<dupid_t, duplication_entry>> dup_map;\n        dup_map[2][ent.dupid] = ent; // app 2 doesn't have a primary replica\n\n        dup_sync->update_duplication_map(dup_map);\n\n        ASSERT_TRUE(stub->mock_replicas[gpid(2, 1)]\n                        ->get_replica_duplicator_manager()\n                        ._duplications.empty());\n    }\n\n    void test_update_confirmed_points()\n    {\n        for (int32_t appid = 1; appid <= 10; appid++) {\n            stub->add_primary_replica(appid, 1);\n        }\n\n        for (int appid = 1; appid <= 3; appid++) {\n            auto r = stub->find_replica(appid, 1);\n\n            duplication_entry ent;\n            ent.dupid = 1;\n            ent.status = duplication_status::DS_PAUSE;\n            ent.progress[r->get_gpid().get_partition_index()] = 0;\n            auto dup = make_unique<replica_duplicator>(ent, r);\n            dup->update_progress(dup->progress().set_last_decree(3).set_confirmed_decree(1));\n            add_dup(r, std::move(dup));\n        }\n\n        duplication_entry ent;\n        ent.dupid = 1;\n        ent.progress[1] = 3; // app=[1,2,3], partition=1, confirmed=3\n        duplication_sync_response resp;\n        resp.dup_map[1][ent.dupid] = ent;\n        resp.dup_map[2][ent.dupid] = ent;\n        resp.dup_map[3][ent.dupid] = ent;\n\n        dup_sync->on_duplication_sync_reply(ERR_OK, resp);\n\n        for (int appid = 1; appid <= 3; appid++) {\n            auto r = stub->find_replica(appid, 1);\n            auto dup = find_dup(r, 1);\n\n            ASSERT_EQ(dup->progress().confirmed_decree, 3);\n        }\n    }\n\n    // ensure dup-sync behaves correctly regardless\n    // replica status transition (PRIMARY->SECONDARY/SECONDARY->PRIMARY)\n    void test_replica_status_transition()\n    {\n        // 10 primaries\n        int appid = 1;\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            stub->add_primary_replica(appid, partition_id);\n        }\n\n        duplication_entry ent;\n        ent.dupid = 2;\n        ent.status = duplication_status::DS_PAUSE;\n        for (int i = 0; i < 10; i++) {\n            ent.progress[i] = 0;\n        }\n        std::map<int32_t, std::map<dupid_t, duplication_entry>> dup_map;\n        dup_map[appid][ent.dupid] = ent;\n\n        dup_sync->update_duplication_map(dup_map);\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            ASSERT_NE(find_dup(stub->find_replica(1, partition_id), 2), nullptr) << partition_id;\n            ASSERT_EQ(find_dup(stub->find_replica(1, partition_id), 2)->id(), 2);\n        }\n\n        // primary -> secondary\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            stub->find_replica(1, partition_id)->as_secondary();\n        }\n        dup_sync->update_duplication_map(dup_map);\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            ASSERT_TRUE(stub->find_replica(1, partition_id)\n                            ->get_duplication_manager()\n                            ->_duplications.empty());\n        }\n\n        // secondary back to primary\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            stub->find_replica(1, partition_id)->as_primary();\n        }\n        dup_sync->update_duplication_map(dup_map);\n        for (int partition_id = 0; partition_id < 10; partition_id++) {\n            ASSERT_EQ(find_dup(stub->find_replica(1, partition_id), 2)->id(), 2);\n        }\n\n        // on meta's perspective, only 3 partitions are hosted on this server\n        ent.progress.clear();\n        for (int i = 0; i < 3; i++) {\n            ent.progress[i] = 0;\n        }\n        dup_map[appid][ent.dupid] = ent;\n        dup_sync->update_duplication_map(dup_map);\n        for (int partition_id = 0; partition_id < 3; partition_id++) {\n            ASSERT_EQ(find_dup(stub->find_replica(1, partition_id), 2)->id(), 2);\n        }\n        for (int partition_id = 3; partition_id < 10; partition_id++) {\n            ASSERT_TRUE(stub->find_replica(1, partition_id)\n                            ->get_duplication_manager()\n                            ->_duplications.empty());\n        }\n    }\n\n    // meta server doesn't suppose to sync duplication that's INIT or REMOVED\n    // there must be some internal problems.\n    void test_receive_illegal_duplication_status()\n    {\n        stub->add_primary_replica(1, 0);\n\n        duplication_entry ent;\n        ent.dupid = 2;\n        ent.status = duplication_status::DS_PAUSE;\n        for (int i = 0; i < 16; i++) {\n            ent.progress[i] = 0;\n        }\n        std::map<int32_t, std::map<dupid_t, duplication_entry>> dup_map;\n        dup_map[1][ent.dupid] = ent;\n        dup_sync->update_duplication_map(dup_map);\n        ASSERT_EQ(find_dup(stub->find_replica(1, 0), 2)->_status, duplication_status::DS_PAUSE);\n\n        ent.status = duplication_status::DS_INIT;\n        dup_map[1][ent.dupid] = ent;\n        dup_sync->update_duplication_map(dup_map);\n        ASSERT_EQ(find_dup(stub->find_replica(1, 0), 2)->_status, duplication_status::DS_PAUSE);\n\n        ent.status = duplication_status::DS_REMOVED;\n        dup_map[1][ent.dupid] = ent;\n        dup_sync->update_duplication_map(dup_map);\n        ASSERT_EQ(find_dup(stub->find_replica(1, 0), 2)->_status, duplication_status::DS_PAUSE);\n    }\n\nprotected:\n    std::unique_ptr<duplication_sync_timer> dup_sync;\n};\n\nTEST_F(duplication_sync_timer_test, duplication_sync) { test_duplication_sync(); }\n\nTEST_F(duplication_sync_timer_test, update_duplication_map) { test_update_duplication_map(); }\n\nTEST_F(duplication_sync_timer_test, update_on_non_primary) { test_update_on_non_primary(); }\n\nTEST_F(duplication_sync_timer_test, update_confirmed_points) { test_update_confirmed_points(); }\n\nTEST_F(duplication_sync_timer_test, on_duplication_sync_reply) { test_on_duplication_sync_reply(); }\n\nTEST_F(duplication_sync_timer_test, replica_status_transition) { test_replica_status_transition(); }\n\nTEST_F(duplication_sync_timer_test, receive_illegal_duplication_status)\n{\n    test_receive_illegal_duplication_status();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/duplication_test_base.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"replica/mutation_log_utils.h\"\n#include \"replica/test/replica_test_base.h\"\n#include \"replica/duplication/replica_duplicator.h\"\n#include \"replica/duplication/replica_duplicator_manager.h\"\n#include \"replica/duplication/duplication_sync_timer.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_DUPLICATION_IDEMPOTENT_WRITE, NOT_ALLOW_BATCH, IS_IDEMPOTENT)\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_DUPLICATION_NON_IDEMPOTENT_WRITE, NOT_ALLOW_BATCH, NOT_IDEMPOTENT)\n\nclass duplication_test_base : public replica_test_base\n{\npublic:\n    duplication_test_base()\n    {\n        mutation_duplicator::creator = [](replica_base *r, dsn::string_view, dsn::string_view) {\n            return make_unique<mock_mutation_duplicator>(r);\n        };\n        stub->_duplication_sync_timer = make_unique<duplication_sync_timer>(stub.get());\n    }\n\n    void add_dup(mock_replica *r, replica_duplicator_u_ptr dup)\n    {\n        r->get_replica_duplicator_manager()._duplications[dup->id()] = std::move(dup);\n    }\n\n    replica_duplicator *find_dup(mock_replica *r, dupid_t dupid)\n    {\n        auto &dup_entities = r->get_replica_duplicator_manager()._duplications;\n        if (dup_entities.find(dupid) == dup_entities.end()) {\n            return nullptr;\n        }\n        return dup_entities[dupid].get();\n    }\n\n    std::unique_ptr<replica_duplicator> create_test_duplicator(decree confirmed = invalid_decree,\n                                                               decree start = invalid_decree)\n    {\n        duplication_entry dup_ent;\n        dup_ent.dupid = 1;\n        dup_ent.remote = \"remote_address\";\n        dup_ent.status = duplication_status::DS_PAUSE;\n        dup_ent.progress[_replica->get_gpid().get_partition_index()] = confirmed;\n\n        auto duplicator = make_unique<replica_duplicator>(dup_ent, _replica.get());\n        duplicator->_start_point_decree = start;\n        return duplicator;\n    }\n\n    std::map<int, log_file_ptr> open_log_file_map(const std::string &log_dir)\n    {\n        std::map<int, log_file_ptr> log_file_map;\n        error_s err = log_utils::open_log_file_map(log_dir, log_file_map);\n        EXPECT_EQ(err, error_s::ok());\n        return log_file_map;\n    }\n\n    mutation_ptr create_test_mutation(int64_t decree, const std::string &data) override\n    {\n        auto mut = replica_test_base::create_test_mutation(decree, data);\n        mut->data.updates[0].code = RPC_DUPLICATION_IDEMPOTENT_WRITE; // must be idempotent write\n        return mut;\n    }\n\n    void wait_all(const std::unique_ptr<replica_duplicator> &dup)\n    {\n        dup->tracker()->wait_outstanding_tasks();\n        dup->_replica->tracker()->wait_outstanding_tasks();\n    }\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/load_from_private_log_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/fail_point.h>\n\n#define BOOST_NO_CXX11_SCOPED_ENUMS\n#include <boost/filesystem/operations.hpp>\n#undef BOOST_NO_CXX11_SCOPED_ENUMS\n\n#include \"replica/mutation_log_utils.h\"\n#include \"replica/duplication/load_from_private_log.h\"\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_RRDB_RRDB_PUT, ALLOW_BATCH, IS_IDEMPOTENT)\n\nclass load_from_private_log_test : public duplication_test_base\n{\npublic:\n    load_from_private_log_test()\n    {\n        _replica->init_private_log(_log_dir);\n        duplicator = create_test_duplicator();\n    }\n\n    // return number of entries written\n    int generate_multiple_log_files(uint files_num = 3)\n    {\n        // decree ranges from [1, files_num*10)\n        for (int f = 0; f < files_num; f++) {\n            // each round mlog will replay the former logs, and create new file\n            mutation_log_ptr mlog = create_private_log();\n            for (int i = 1; i <= 10; i++) {\n                std::string msg = \"hello!\";\n                mutation_ptr mu = create_test_mutation(10 * f + i, msg);\n                mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            }\n            mlog->tracker()->wait_outstanding_tasks();\n            mlog->close();\n        }\n        return static_cast<int>(files_num * 10);\n    }\n\n    void test_find_log_file_to_start()\n    {\n        load_from_private_log load(_replica.get(), duplicator.get());\n\n        std::vector<std::string> mutations;\n        int max_log_file_mb = 1;\n\n        mutation_log_ptr mlog = new mutation_log_private(\n            _replica->dir(), max_log_file_mb, _replica->get_gpid(), _replica.get());\n        EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n\n        load.find_log_file_to_start({});\n        ASSERT_FALSE(load._current);\n\n        int num_entries = generate_multiple_log_files(3);\n\n        auto files = open_log_file_map(_log_dir);\n\n        load.set_start_decree(1);\n        load.find_log_file_to_start(files);\n        ASSERT_TRUE(load._current);\n        ASSERT_EQ(load._current->index(), 1);\n\n        load._current = nullptr;\n        load.set_start_decree(5);\n        load.find_log_file_to_start(files);\n        ASSERT_TRUE(load._current);\n        ASSERT_EQ(load._current->index(), 1);\n\n        int last_idx = files.rbegin()->first;\n        load._current = nullptr;\n        load.set_start_decree(num_entries + 200);\n        load.find_log_file_to_start(files);\n        ASSERT_TRUE(load._current);\n        ASSERT_EQ(load._current->index(), last_idx);\n    }\n\n    void test_start_duplication(int num_entries, int private_log_size_mb)\n    {\n        mutation_log_ptr mlog = create_private_log(private_log_size_mb, _replica->get_gpid());\n\n        int last_commit_decree_start = 5;\n        int decree_start = 10;\n        {\n            DSN_DECLARE_bool(plog_force_flush);\n            auto reserved_plog_force_flush = FLAGS_plog_force_flush;\n            FLAGS_plog_force_flush = true;\n            for (int i = decree_start; i <= num_entries + decree_start; i++) {\n                std::string msg = \"hello!\";\n                //  decree - last_commit_decree  = 1 by default\n                mutation_ptr mu = create_test_mutation(i, msg);\n                // mock the last_commit_decree of first mu equal with `last_commit_decree_start`\n                if (i == decree_start) {\n                    mu->data.header.last_committed_decree = last_commit_decree_start;\n                }\n                mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            }\n\n            // commit the last entry\n            mutation_ptr mu = create_test_mutation(decree_start + num_entries + 1, \"hello!\");\n            mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            FLAGS_plog_force_flush = reserved_plog_force_flush;\n\n            mlog->close();\n        }\n\n        load_and_wait_all_entries_loaded(num_entries, num_entries, decree_start);\n    }\n\n    mutation_tuple_set\n    load_and_wait_all_entries_loaded(int total, int last_decree, decree start_decree)\n    {\n        return load_and_wait_all_entries_loaded(\n            total, last_decree, _replica->get_gpid(), start_decree, -1);\n    }\n\n    mutation_tuple_set load_and_wait_all_entries_loaded(int total, int last_decree)\n    {\n        return load_and_wait_all_entries_loaded(total, last_decree, _replica->get_gpid(), 0, -1);\n    }\n\n    mutation_tuple_set load_and_wait_all_entries_loaded(\n        int total, int last_decree, gpid id, decree start_decree, decree confirmed_decree)\n    {\n        mutation_log_ptr mlog = create_private_log(id);\n        for (const auto &pr : mlog->get_log_file_map()) {\n            EXPECT_TRUE(pr.second->file_handle() == nullptr);\n        }\n        _replica->init_private_log(mlog);\n        duplicator = create_test_duplicator(confirmed_decree);\n\n        load_from_private_log load(_replica.get(), duplicator.get());\n        const_cast<std::chrono::milliseconds &>(load._repeat_delay) = 1_s;\n        load.set_start_decree(start_decree);\n\n        mutation_tuple_set loaded_mutations;\n        pipeline::do_when<decree, mutation_tuple_set> end_stage(\n            [&loaded_mutations, &load, total, last_decree](decree &&d,\n                                                           mutation_tuple_set &&mutations) {\n                // we create one mutation_update per mutation\n                // the mutations are started from 1\n                for (mutation_tuple mut : mutations) {\n                    loaded_mutations.emplace(mut);\n                }\n\n                if (loaded_mutations.size() < total || d < last_decree) {\n                    load.run();\n                }\n            });\n\n        duplicator->from(load).link(end_stage);\n\n        // inject some faults\n        fail::setup();\n        fail::cfg(\"open_read\", \"25%1*return()\");\n        fail::cfg(\"mutation_log_read_log_block\", \"25%1*return()\");\n        fail::cfg(\"duplication_sync_complete\", \"void()\");\n        duplicator->run_pipeline();\n        duplicator->wait_all();\n        fail::teardown();\n\n        return loaded_mutations;\n    }\n\n    void test_restart_duplication()\n    {\n        load_from_private_log load(_replica.get(), duplicator.get());\n\n        generate_multiple_log_files(2);\n\n        std::vector<std::string> files;\n        ASSERT_EQ(log_utils::list_all_files(_log_dir, files), error_s::ok());\n        ASSERT_EQ(files.size(), 2);\n        boost::filesystem::remove(_log_dir + \"/log.1.0\");\n\n        mutation_log_ptr mlog = create_private_log();\n        decree max_gced_dercee = mlog->max_gced_decree_no_lock(_replica->get_gpid());\n\n        // new duplication, start_decree = max_gced_decree + 1\n        // ensure we can find the first file.\n        load.set_start_decree(max_gced_dercee + 1);\n        load.find_log_file_to_start(mlog->get_log_file_map());\n        ASSERT_TRUE(load._current);\n        ASSERT_EQ(load._current->index(), 2);\n    }\n\n    mutation_log_ptr create_private_log(gpid id) { return create_private_log(1, id); }\n\n    mutation_log_ptr create_private_log(int private_log_size_mb = 1, gpid id = gpid(1, 1))\n    {\n        std::map<gpid, decree> replay_condition;\n        replay_condition[id] = 0; // duplicating\n        mutation_log::replay_callback cb = [](int, mutation_ptr &) { return true; };\n        mutation_log_ptr mlog;\n\n        int try_cnt = 0;\n        while (try_cnt < 5) {\n            try_cnt++;\n            mlog =\n                new mutation_log_private(_replica->dir(), private_log_size_mb, id, _replica.get());\n            error_code err = mlog->open(cb, nullptr, replay_condition);\n            if (err == ERR_OK) {\n                break;\n            }\n            derror_f(\"mlog open failed, encountered error: {}\", err);\n        }\n        return mlog;\n    }\n\n    std::unique_ptr<replica_duplicator> duplicator;\n};\n\nTEST_F(load_from_private_log_test, find_log_file_to_start) { test_find_log_file_to_start(); }\n\nTEST_F(load_from_private_log_test, start_duplication_10000_4MB)\n{\n    test_start_duplication(10000, 4);\n}\n\nTEST_F(load_from_private_log_test, start_duplication_50000_4MB)\n{\n    test_start_duplication(50000, 4);\n}\n\nTEST_F(load_from_private_log_test, start_duplication_10000_1MB)\n{\n    test_start_duplication(10000, 1);\n}\n\nTEST_F(load_from_private_log_test, start_duplication_50000_1MB)\n{\n    test_start_duplication(50000, 1);\n}\n\nTEST_F(load_from_private_log_test, start_duplication_100000_4MB)\n{\n    test_start_duplication(100000, 4);\n}\n\n// Ensure replica_duplicator can correctly handle real-world log file\nTEST_F(load_from_private_log_test, handle_real_private_log)\n{\n    struct test_data\n    {\n        std::string fname;\n        int puts;\n        int total;\n        gpid id;\n    } tests[] = {\n        // PUT, PUT, PUT, EMPTY, PUT, EMPTY, EMPTY\n        {\"log.1.0.handle_real_private_log\", 4, 6, gpid(1, 4)},\n\n        // EMPTY, PUT, EMPTY\n        {\"log.1.0.handle_real_private_log2\", 1, 2, gpid(1, 4)},\n\n        // EMPTY, EMPTY, EMPTY\n        {\"log.1.0.all_loaded_are_write_empties\", 0, 2, gpid(1, 5)},\n    };\n\n    for (auto tt : tests) {\n        boost::filesystem::path file(tt.fname);\n        boost::filesystem::copy_file(\n            file, _log_dir + \"/log.1.0\", boost::filesystem::copy_option::overwrite_if_exists);\n\n        // reset replica to specified gpid\n        duplicator.reset(nullptr);\n        _replica = create_mock_replica(\n            stub.get(), tt.id.get_app_id(), tt.id.get_partition_index(), _log_dir.c_str());\n\n        load_and_wait_all_entries_loaded(tt.puts, tt.total, tt.id, 1, 0);\n    }\n}\n\nTEST_F(load_from_private_log_test, restart_duplication) { test_restart_duplication(); }\n\nTEST_F(load_from_private_log_test, ignore_useless)\n{\n    utils::filesystem::remove_path(_log_dir);\n\n    mutation_log_ptr mlog = create_private_log();\n\n    int num_entries = 100;\n    for (int i = 1; i <= num_entries; i++) {\n        std::string msg = \"hello!\";\n        mutation_ptr mu = create_test_mutation(i, msg);\n        mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n    }\n\n    // commit the last entry\n    mutation_ptr mu = create_test_mutation(1 + num_entries, \"hello!\");\n    mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n    mlog->close();\n\n    // starts from 51\n    mutation_tuple_set result =\n        load_and_wait_all_entries_loaded(50, 100, _replica->get_gpid(), 51, 0);\n    ASSERT_EQ(result.size(), 50);\n\n    // starts from 100\n    result = load_and_wait_all_entries_loaded(1, 100, _replica->get_gpid(), 100, 0);\n    ASSERT_EQ(result.size(), 1);\n\n    // a new duplication's confirmed_decree is invalid_decree,\n    // so start_decree is 0.\n    // In this case duplication will starts from last_commit(100) + 1,\n    // no mutation will be loaded.\n    result = load_and_wait_all_entries_loaded(0, 100, _replica->get_gpid(), 101, -1);\n    ASSERT_EQ(result.size(), 0);\n}\n\nclass load_fail_mode_test : public load_from_private_log_test\n{\npublic:\n    void SetUp() override\n    {\n        const int num_entries = generate_multiple_log_files();\n\n        // prepare loading pipeline\n        mlog = create_private_log();\n        _replica->init_private_log(mlog);\n        duplicator = create_test_duplicator(1);\n        load = make_unique<load_from_private_log>(_replica.get(), duplicator.get());\n        load->TEST_set_repeat_delay(0_ms); // no delay\n        load->set_start_decree(duplicator->progress().last_decree + 1);\n        end_stage = make_unique<end_stage_t>(\n            [this, num_entries](decree &&d, mutation_tuple_set &&mutations) {\n                load->set_start_decree(d + 1);\n                if (d < num_entries - 1) {\n                    load->run();\n                }\n            });\n        duplicator->from(*load).link(*end_stage);\n    }\n\n    mutation_log_ptr mlog;\n    std::unique_ptr<load_from_private_log> load;\n\n    using end_stage_t = pipeline::do_when<decree, mutation_tuple_set>;\n    std::unique_ptr<end_stage_t> end_stage;\n};\n\nTEST_F(load_fail_mode_test, fail_skip)\n{\n    duplicator->update_fail_mode(duplication_fail_mode::FAIL_SKIP);\n    ASSERT_EQ(load->_counter_dup_load_skipped_bytes_count->get_integer_value(), 0);\n\n    // will trigger fail-skip and read the subsequent file, some mutations will be lost.\n    auto repeats = load->MAX_ALLOWED_BLOCK_REPEATS * load->MAX_ALLOWED_FILE_REPEATS;\n    fail::setup();\n    fail::cfg(\"mutation_log_replay_block\", fmt::format(\"100%{}*return()\", repeats));\n    duplicator->run_pipeline();\n    duplicator->wait_all();\n    fail::teardown();\n\n    ASSERT_EQ(load->_counter_dup_load_file_failed_count->get_integer_value(),\n              load_from_private_log::MAX_ALLOWED_FILE_REPEATS);\n    ASSERT_GT(load->_counter_dup_load_skipped_bytes_count->get_integer_value(), 0);\n}\n\nTEST_F(load_fail_mode_test, fail_slow)\n{\n    duplicator->update_fail_mode(duplication_fail_mode::FAIL_SLOW);\n    ASSERT_EQ(load->_counter_dup_load_skipped_bytes_count->get_integer_value(), 0);\n    ASSERT_EQ(load->_counter_dup_load_file_failed_count->get_integer_value(), 0);\n\n    // will trigger fail-slow and retry infinitely\n    auto repeats = load->MAX_ALLOWED_BLOCK_REPEATS * load->MAX_ALLOWED_FILE_REPEATS;\n    fail::setup();\n    fail::cfg(\"mutation_log_replay_block\", fmt::format(\"100%{}*return()\", repeats));\n    duplicator->run_pipeline();\n    duplicator->wait_all();\n    fail::teardown();\n\n    ASSERT_EQ(load->_counter_dup_load_file_failed_count->get_integer_value(),\n              load_from_private_log::MAX_ALLOWED_FILE_REPEATS);\n    ASSERT_EQ(load->_counter_dup_load_skipped_bytes_count->get_integer_value(), 0);\n}\n\nTEST_F(load_fail_mode_test, fail_skip_real_corrupted_file)\n{\n    { // inject some bad data in the middle of the first file\n        std::string log_path = _log_dir + \"/log.1.0\";\n        auto file_size = boost::filesystem::file_size(log_path);\n        int fd = open(log_path.c_str(), O_WRONLY);\n        const char buf[] = \"xxxxxx\";\n        auto written_size = pwrite(fd, buf, sizeof(buf), file_size / 2);\n        ASSERT_EQ(written_size, sizeof(buf));\n        close(fd);\n    }\n\n    duplicator->update_fail_mode(duplication_fail_mode::FAIL_SKIP);\n    duplicator->run_pipeline();\n    duplicator->wait_all();\n\n    // ensure the bad file will be skipped\n    ASSERT_EQ(load->_counter_dup_load_file_failed_count->get_integer_value(),\n              load_from_private_log::MAX_ALLOWED_FILE_REPEATS);\n    ASSERT_GT(load->_counter_dup_load_skipped_bytes_count->get_integer_value(), 0);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/replica/duplication/test/mutation_batch_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"duplication_test_base.h\"\n#include \"replica/duplication/mutation_batch.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation_batch_test : public duplication_test_base\n{\npublic:\n    void\n    reset_buffer(const mutation_batch &batcher, const decree last_commit, decree start, decree end)\n    {\n        batcher._mutation_buffer->reset(last_commit);\n        batcher._mutation_buffer->_start_decree = start;\n        batcher._mutation_buffer->_end_decree = end;\n    }\n\n    void commit_buffer(const mutation_batch &batcher, const decree current_decree)\n    {\n        batcher._mutation_buffer->commit(current_decree, COMMIT_TO_DECREE_HARD);\n    }\n};\n\nTEST_F(mutation_batch_test, add_mutation_if_valid)\n{\n    auto duplicator = create_test_duplicator(0);\n    mutation_batch batcher(duplicator.get());\n\n    mutation_tuple_set result;\n\n    std::string s = \"hello\";\n    mutation_ptr mu1 = create_test_mutation(1, s);\n    batcher.add_mutation_if_valid(mu1, 0);\n    result = batcher.move_all_mutations();\n    mutation_tuple mt1 = *result.begin();\n\n    s = \"world\";\n    mutation_ptr mu2 = create_test_mutation(2, s);\n    batcher.add_mutation_if_valid(mu2, 0);\n    result = batcher.move_all_mutations();\n    mutation_tuple mt2 = *result.begin();\n\n    ASSERT_EQ(std::get<2>(mt1).to_string(), \"hello\");\n    ASSERT_EQ(std::get<2>(mt2).to_string(), \"world\");\n\n    // decree 1 should be ignored\n    mutation_ptr mu3 = create_test_mutation(1, s);\n    batcher.add_mutation_if_valid(mu2, 2);\n    batcher.add_mutation_if_valid(mu3, 1);\n    result = batcher.move_all_mutations();\n    ASSERT_EQ(result.size(), 2);\n}\n\nTEST_F(mutation_batch_test, ignore_non_idempotent_write)\n{\n    auto duplicator = create_test_duplicator(0);\n    mutation_batch batcher(duplicator.get());\n\n    std::string s = \"hello\";\n    mutation_ptr mu = create_test_mutation(1, s);\n    mu->data.updates[0].code = RPC_DUPLICATION_NON_IDEMPOTENT_WRITE;\n    batcher.add_mutation_if_valid(mu, 0);\n    mutation_tuple_set result = batcher.move_all_mutations();\n    ASSERT_EQ(result.size(), 0);\n}\n\nTEST_F(mutation_batch_test, mutation_buffer_commit)\n{\n    auto duplicator = create_test_duplicator(0);\n    mutation_batch batcher(duplicator.get());\n    // mock mutation_buffer[last=10, start=15, end=20], last + 1(next commit decree) is out of\n    // [start~end]\n    reset_buffer(batcher, 10, 15, 20);\n    commit_buffer(batcher, 15);\n    ASSERT_EQ(batcher.last_decree(), 14);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/replica_duplicator_manager_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_duplicator_manager_test : public duplication_test_base\n{\npublic:\n    void SetUp() override { stub = make_unique<mock_replica_stub>(); }\n\n    void TearDown() override { stub.reset(); }\n\n    void test_remove_non_existed_duplications()\n    {\n        auto r = stub->add_primary_replica(2, 1);\n        auto &d = r->get_replica_duplicator_manager();\n\n        duplication_entry ent;\n        ent.dupid = 1;\n        ent.status = duplication_status::DS_PAUSE;\n        ent.remote = \"dsn://slave-cluster\";\n        ent.progress[r->get_gpid().get_partition_index()] = 0;\n        d.sync_duplication(ent);\n        ASSERT_EQ(d._duplications.size(), 1);\n\n        // remove all dup\n        d.remove_non_existed_duplications({});\n        ASSERT_EQ(d._duplications.size(), 0);\n\n        ent.dupid = 2;\n        d.sync_duplication(ent);\n        ASSERT_EQ(d._duplications.size(), 1);\n    }\n\n    void test_set_confirmed_decree_non_primary()\n    {\n        auto r = stub->add_primary_replica(2, 1);\n        auto &d = r->get_replica_duplicator_manager();\n\n        duplication_entry ent;\n        ent.dupid = 1;\n        ent.status = duplication_status::DS_PAUSE;\n        ent.remote = \"dsn://slave-cluster\";\n        ent.progress[r->get_gpid().get_partition_index()] = 100;\n        d.sync_duplication(ent);\n        ASSERT_EQ(d._duplications.size(), 1);\n        ASSERT_EQ(d._primary_confirmed_decree, invalid_decree);\n\n        // replica failover\n        r->as_secondary();\n\n        d.update_confirmed_decree_if_secondary(99);\n        ASSERT_EQ(d._duplications.size(), 0);\n        ASSERT_EQ(d._primary_confirmed_decree, 99);\n\n        // receives group check\n        d.update_confirmed_decree_if_secondary(101);\n        ASSERT_EQ(d._duplications.size(), 0);\n        ASSERT_EQ(d._primary_confirmed_decree, 101);\n\n        // confirmed decree never decreases\n        d.update_confirmed_decree_if_secondary(0);\n        ASSERT_EQ(d._primary_confirmed_decree, 101);\n        d.update_confirmed_decree_if_secondary(1);\n        ASSERT_EQ(d._primary_confirmed_decree, 101);\n\n        // duplication removed and the confimed_decree = -1\n        d.update_confirmed_decree_if_secondary(-1);\n        ASSERT_EQ(d._primary_confirmed_decree, -1);\n    }\n\n    void test_get_duplication_confirms()\n    {\n        auto r = stub->add_primary_replica(2, 1);\n\n        int total_dup_num = 10;\n        int update_dup_num = 4; // the number of dups that will be updated\n\n        for (dupid_t id = 1; id <= update_dup_num; id++) {\n            duplication_entry ent;\n            ent.dupid = id;\n            ent.status = duplication_status::DS_PAUSE;\n            ent.progress[r->get_gpid().get_partition_index()] = 0;\n\n            auto dup = make_unique<replica_duplicator>(ent, r);\n            dup->update_progress(dup->progress().set_last_decree(2).set_confirmed_decree(1));\n            add_dup(r, std::move(dup));\n        }\n\n        for (dupid_t id = update_dup_num + 1; id <= total_dup_num; id++) {\n            duplication_entry ent;\n            ent.dupid = id;\n            ent.status = duplication_status::DS_PAUSE;\n            ent.progress[r->get_gpid().get_partition_index()] = 0;\n\n            auto dup = make_unique<replica_duplicator>(ent, r);\n            dup->update_progress(dup->progress().set_last_decree(1).set_confirmed_decree(1));\n            add_dup(r, std::move(dup));\n        }\n\n        auto result = r->get_replica_duplicator_manager().get_duplication_confirms_to_update();\n        ASSERT_EQ(result.size(), update_dup_num);\n    }\n\n    void test_min_confirmed_decree()\n    {\n        struct test_case\n        {\n            std::vector<int64_t> confirmed_decree;\n            int64_t min_confirmed_decree;\n        };\n\n        auto r = stub->add_non_primary_replica(2, 1);\n        auto assert_test = [r, this](test_case tt) {\n            for (int id = 1; id <= tt.confirmed_decree.size(); id++) {\n                duplication_entry ent;\n                ent.dupid = id;\n                ent.status = duplication_status::DS_PAUSE;\n                ent.progress[r->get_gpid().get_partition_index()] = 0;\n\n                auto dup = make_unique<replica_duplicator>(ent, r);\n                dup->update_progress(dup->progress()\n                                         .set_last_decree(tt.confirmed_decree[id - 1])\n                                         .set_confirmed_decree(tt.confirmed_decree[id - 1]));\n                add_dup(r, std::move(dup));\n            }\n\n            ASSERT_EQ(r->get_replica_duplicator_manager().min_confirmed_decree(),\n                      tt.min_confirmed_decree);\n            r->get_replica_duplicator_manager()._duplications.clear();\n        };\n\n        {\n            // non-primary\n            test_case tt{{1, 2, 3}, invalid_decree};\n            assert_test(tt);\n        }\n\n        { // primary\n            r->as_primary();\n            test_case tt{{1, 2, 3}, 1};\n            assert_test(tt);\n\n            tt = {{1000}, 1000};\n            assert_test(tt);\n\n            tt = {{}, invalid_decree};\n            assert_test(tt);\n        }\n    }\n};\n\nTEST_F(replica_duplicator_manager_test, get_duplication_confirms)\n{\n    test_get_duplication_confirms();\n}\n\nTEST_F(replica_duplicator_manager_test, set_confirmed_decree_non_primary)\n{\n    test_set_confirmed_decree_non_primary();\n}\n\nTEST_F(replica_duplicator_manager_test, remove_non_existed_duplications)\n{\n    test_remove_non_existed_duplications();\n}\n\nTEST_F(replica_duplicator_manager_test, min_confirmed_decree) { test_min_confirmed_decree(); }\n\nTEST_F(replica_duplicator_manager_test, update_checkpoint_prepared)\n{\n    auto r = stub->add_primary_replica(2, 1);\n    duplication_entry ent;\n    ent.dupid = 1;\n    ent.status = duplication_status::DS_PAUSE;\n    ent.progress[r->get_gpid().get_partition_index()] = 0;\n\n    auto dup = make_unique<replica_duplicator>(ent, r);\n    r->update_last_durable_decree(100);\n    dup->update_progress(dup->progress().set_last_decree(2).set_confirmed_decree(1));\n    add_dup(r, std::move(dup));\n    auto updates = r->get_replica_duplicator_manager().get_duplication_confirms_to_update();\n    for (const auto &update : updates) {\n        ASSERT_TRUE(update.checkpoint_prepared);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/replica_duplicator_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/filesystem.h>\n\n#include \"replica/mutation_log_utils.h\"\n#include \"replica/duplication/load_from_private_log.h\"\n#include \"replica/duplication/duplication_pipeline.h\"\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace apps {\n\n// for loading PUT mutations from log file.\nDEFINE_TASK_CODE_RPC(RPC_RRDB_RRDB_PUT, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT);\n\n} // namespace apps\n} // namespace dsn\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_duplicator_test : public duplication_test_base\n{\npublic:\n    replica_duplicator_test()\n    {\n        _replica->set_partition_status(partition_status::PS_PRIMARY);\n        _replica->init_private_log(_log_dir);\n    }\n\n    mock_replica *replica() { return _replica.get(); }\n\n    decree last_durable_decree() const { return _replica->last_durable_decree(); }\n\n    decree log_dup_start_decree(const std::unique_ptr<replica_duplicator> &dup) const\n    {\n        return dup->_start_point_decree;\n    }\n\n    void test_new_duplicator()\n    {\n        dupid_t dupid = 1;\n        std::string remote = \"remote_address\";\n        duplication_status::type status = duplication_status::DS_PAUSE;\n        int64_t confirmed_decree = 100;\n\n        duplication_entry dup_ent;\n        dup_ent.dupid = dupid;\n        dup_ent.remote = remote;\n        dup_ent.status = status;\n        dup_ent.progress[_replica->get_gpid().get_partition_index()] = confirmed_decree;\n\n        auto duplicator = make_unique<replica_duplicator>(dup_ent, _replica.get());\n        ASSERT_EQ(duplicator->id(), dupid);\n        ASSERT_EQ(duplicator->remote_cluster_name(), remote);\n        ASSERT_EQ(duplicator->_status, status);\n        ASSERT_EQ(duplicator->progress().confirmed_decree, confirmed_decree);\n        ASSERT_EQ(duplicator->progress().last_decree, confirmed_decree);\n\n        auto &expected_env = *duplicator;\n        ASSERT_EQ(duplicator->tracker(), expected_env.__conf.tracker);\n        ASSERT_EQ(duplicator->get_gpid().thread_hash(), expected_env.__conf.thread_hash);\n    }\n\n    void test_pause_start_duplication()\n    {\n        mutation_log_ptr mlog =\n            new mutation_log_private(_replica->dir(), 4, _replica->get_gpid(), _replica.get());\n        EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n\n        {\n            _replica->init_private_log(mlog);\n            auto duplicator = create_test_duplicator();\n\n            duplicator->update_status_if_needed(duplication_status::DS_LOG);\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_LOG);\n            auto expected_env = duplicator->_ship->_mutation_duplicator->_env;\n            ASSERT_EQ(duplicator->tracker(), expected_env.__conf.tracker);\n            ASSERT_EQ(duplicator->get_gpid().thread_hash(), expected_env.__conf.thread_hash);\n\n            // corner cases: next_status is INIT\n            duplicator->update_status_if_needed(duplication_status::DS_INIT);\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_LOG);\n            duplicator->update_status_if_needed(duplication_status::DS_LOG);\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_LOG);\n\n            duplicator->update_status_if_needed(duplication_status::DS_PAUSE);\n            ASSERT_TRUE(duplicator->paused());\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_PAUSE);\n            ASSERT_EQ(duplicator->_load_private.get(), nullptr);\n            ASSERT_EQ(duplicator->_load.get(), nullptr);\n            ASSERT_EQ(duplicator->_ship.get(), nullptr);\n\n            // corner cases: next_status is INIT\n            duplicator->update_status_if_needed(duplication_status::DS_INIT);\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_PAUSE);\n\n            // corner cases: next_status is INIT\n            duplicator->update_status_if_needed(duplication_status::DS_INIT);\n            ASSERT_EQ(duplicator->_status, duplication_status::DS_PAUSE);\n\n            duplicator->wait_all();\n        }\n    }\n};\n\nTEST_F(replica_duplicator_test, new_duplicator) { test_new_duplicator(); }\n\nTEST_F(replica_duplicator_test, pause_start_duplication) { test_pause_start_duplication(); }\n\nTEST_F(replica_duplicator_test, duplication_progress)\n{\n    auto duplicator = create_test_duplicator();\n    ASSERT_EQ(duplicator->progress().last_decree, 0); // start duplication from empty plog\n    ASSERT_EQ(duplicator->progress().confirmed_decree, invalid_decree);\n\n    duplicator->update_progress(duplicator->progress().set_last_decree(10));\n    ASSERT_EQ(duplicator->progress().last_decree, 10);\n    ASSERT_EQ(duplicator->progress().confirmed_decree, invalid_decree);\n\n    duplicator->update_progress(duplicator->progress().set_confirmed_decree(10));\n    ASSERT_EQ(duplicator->progress().confirmed_decree, 10);\n    ASSERT_EQ(duplicator->progress().last_decree, 10);\n\n    ASSERT_EQ(duplicator->update_progress(duplicator->progress().set_confirmed_decree(1)),\n              error_s::make(ERR_INVALID_STATE, \"never decrease confirmed_decree: new(1) old(10)\"));\n\n    ASSERT_EQ(duplicator->update_progress(duplicator->progress().set_confirmed_decree(12)),\n              error_s::make(ERR_INVALID_STATE,\n                            \"last_decree(10) should always larger than confirmed_decree(12)\"));\n\n    auto duplicator_for_checkpoint = create_test_duplicator(invalid_decree, 100);\n    ASSERT_FALSE(duplicator_for_checkpoint->progress().checkpoint_has_prepared);\n\n    replica()->update_last_durable_decree(101);\n    duplicator_for_checkpoint->update_progress(duplicator->progress());\n    ASSERT_TRUE(duplicator_for_checkpoint->progress().checkpoint_has_prepared);\n}\n\nTEST_F(replica_duplicator_test, prapre_dup)\n{\n    auto duplicator = create_test_duplicator(invalid_decree, 100);\n    replica()->update_expect_last_durable_decree(100);\n    duplicator->prepare_dup();\n    wait_all(duplicator);\n    ASSERT_EQ(last_durable_decree(), log_dup_start_decree(duplicator));\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/replica_follower_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"replica/duplication/replica_follower.h\"\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace apps {\n\n} // namespace apps\n} // namespace dsn\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_follower_test : public duplication_test_base\n{\npublic:\n    replica_follower_test()\n    {\n        _app_info.app_id = 2;\n        _app_info.app_name = \"follower\";\n        _app_info.app_type = \"replica\";\n        _app_info.is_stateful = true;\n        _app_info.max_replica_count = 3;\n        _app_info.partition_count = 8;\n    }\n\n    void update_mock_replica(const dsn::app_info &app)\n    {\n        bool is_duplication_follower =\n            (app.envs.find(duplication_constants::kDuplicationEnvMasterClusterKey) !=\n             app.envs.end()) &&\n            (app.envs.find(duplication_constants::kDuplicationEnvMasterMetasKey) != app.envs.end());\n        _mock_replica = stub->generate_replica_ptr(\n            app, gpid(2, 1), partition_status::PS_PRIMARY, 1, false, is_duplication_follower);\n    }\n\n    void set_duplicating(bool duplicating, replica_follower *follower)\n    {\n        follower->_duplicating_checkpoint = duplicating;\n    }\n\n    bool get_duplicating(replica_follower *follower) { return follower->_duplicating_checkpoint; }\n\n    void async_duplicate_checkpoint_from_master_replica(replica_follower *follower)\n    {\n        follower->async_duplicate_checkpoint_from_master_replica();\n    }\n\n    bool wait_follower_task_completed(replica_follower *follower)\n    {\n        follower->_tracker.wait_outstanding_tasks();\n        return follower->_tracker.all_tasks_success();\n    }\n\n    void mark_tracker_tasks_success(replica_follower *follower)\n    {\n        follower->_tracker.set_tasks_success();\n    }\n\n    error_code update_master_replica_config(replica_follower *follower,\n                                            configuration_query_by_index_response &resp)\n    {\n        return follower->update_master_replica_config(ERR_OK, std::move(resp));\n    }\n\n    const partition_configuration &master_replica_config(replica_follower *follower) const\n    {\n        return follower->_master_replica_config;\n    }\n\n    error_code nfs_copy_checkpoint(replica_follower *follower, error_code err, learn_response resp)\n    {\n        return follower->nfs_copy_checkpoint(err, std::move(resp));\n    }\n\n    void init_nfs()\n    {\n        stub->_nfs = nfs_node::create();\n        stub->_nfs->start();\n    }\n\npublic:\n    dsn::app_info _app_info;\n    mock_replica_ptr _mock_replica;\n};\n\nTEST_F(replica_follower_test, test_init_master_info)\n{\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, \"master\");\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                           \"127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802\");\n    update_mock_replica(_app_info);\n\n    auto follower = _mock_replica->get_replica_follower();\n    ASSERT_EQ(follower->get_master_app_name(), \"follower\");\n    ASSERT_EQ(follower->get_master_cluster_name(), \"master\");\n    ASSERT_TRUE(follower->is_need_duplicate());\n    ASSERT_TRUE(_mock_replica->is_duplication_follower());\n    std::vector<std::string> test_ip{\"127.0.0.1:34801\", \"127.0.0.2:34801\", \"127.0.0.3:34802\"};\n    for (int i = 0; i < follower->get_master_meta_list().size(); i++) {\n        ASSERT_EQ(std::string(follower->get_master_meta_list()[i].to_string()), test_ip[i]);\n    }\n\n    _app_info.envs.clear();\n    update_mock_replica(_app_info);\n    follower = _mock_replica->get_replica_follower();\n    ASSERT_FALSE(follower->is_need_duplicate());\n    ASSERT_FALSE(_mock_replica->is_duplication_follower());\n}\n\nTEST_F(replica_follower_test, test_duplicate_checkpoint)\n{\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, \"master\");\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                           \"127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802\");\n    update_mock_replica(_app_info);\n\n    auto follower = _mock_replica->get_replica_follower();\n\n    ASSERT_EQ(follower->duplicate_checkpoint(), ERR_TRY_AGAIN);\n    ASSERT_FALSE(get_duplicating(follower));\n\n    mark_tracker_tasks_success(follower);\n    ASSERT_EQ(follower->duplicate_checkpoint(), ERR_OK);\n    ASSERT_FALSE(get_duplicating(follower));\n\n    set_duplicating(true, follower);\n    ASSERT_EQ(follower->duplicate_checkpoint(), ERR_BUSY);\n}\n\nTEST_F(replica_follower_test, test_async_duplicate_checkpoint_from_master_replica)\n{\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, \"master\");\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                           \"127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802\");\n    update_mock_replica(_app_info);\n\n    auto follower = _mock_replica->get_replica_follower();\n\n    fail::setup();\n    fail::cfg(\"duplicate_checkpoint_failed\", \"void()\");\n    async_duplicate_checkpoint_from_master_replica(follower);\n    ASSERT_FALSE(wait_follower_task_completed(follower));\n    fail::teardown();\n\n    fail::setup();\n    fail::cfg(\"duplicate_checkpoint_ok\", \"void()\");\n    async_duplicate_checkpoint_from_master_replica(follower);\n    ASSERT_TRUE(wait_follower_task_completed(follower));\n    fail::teardown();\n}\n\nTEST_F(replica_follower_test, test_update_master_replica_config)\n{\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, \"master\");\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                           \"127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802\");\n    update_mock_replica(_app_info);\n    auto follower = _mock_replica->get_replica_follower();\n\n    configuration_query_by_index_response resp;\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE);\n    ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address);\n\n    resp.partition_count = 100;\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE);\n    ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address);\n\n    resp.partition_count = _app_info.partition_count;\n    partition_configuration p;\n    resp.partitions.emplace_back(p);\n    resp.partitions.emplace_back(p);\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_DATA);\n    ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address);\n\n    resp.partitions.clear();\n    p.pid = gpid(2, 100);\n    resp.partitions.emplace_back(p);\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INCONSISTENT_STATE);\n    ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address);\n\n    resp.partitions.clear();\n    p.primary = rpc_address::s_invalid_address;\n    p.pid = gpid(2, 1);\n    resp.partitions.emplace_back(p);\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_INVALID_STATE);\n    ASSERT_EQ(master_replica_config(follower).primary, rpc_address::s_invalid_address);\n\n    resp.partitions.clear();\n    p.pid = gpid(2, 1);\n    p.primary = rpc_address(\"127.0.0.1\", 34801);\n    p.secondaries.emplace_back(rpc_address(\"127.0.0.2\", 34801));\n    p.secondaries.emplace_back(rpc_address(\"127.0.0.3\", 34801));\n    resp.partitions.emplace_back(p);\n    ASSERT_EQ(update_master_replica_config(follower, resp), ERR_OK);\n    ASSERT_EQ(master_replica_config(follower).primary, p.primary);\n    ASSERT_EQ(master_replica_config(follower).pid, p.pid);\n}\n\nTEST_F(replica_follower_test, test_nfs_copy_checkpoint)\n{\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterClusterKey, \"master\");\n    _app_info.envs.emplace(duplication_constants::kDuplicationEnvMasterMetasKey,\n                           \"127.0.0.1:34801,127.0.0.2:34801,127.0.0.3:34802\");\n    update_mock_replica(_app_info);\n    init_nfs();\n    auto follower = _mock_replica->get_replica_follower();\n\n    ASSERT_EQ(nfs_copy_checkpoint(follower, ERR_CORRUPTION, learn_response()), ERR_CORRUPTION);\n\n    auto resp = learn_response();\n    resp.address = rpc_address(\"127.0.0.1\", 34801);\n\n    std::string dest = utils::filesystem::path_combine(\n        _mock_replica->dir(), duplication_constants::kDuplicationCheckpointRootDir);\n    dsn::utils::filesystem::create_directory(dest);\n    ASSERT_TRUE(dsn::utils::filesystem::path_exists(dest));\n    ASSERT_EQ(nfs_copy_checkpoint(follower, ERR_OK, resp), ERR_OK);\n    ASSERT_FALSE(dsn::utils::filesystem::path_exists(dest));\n    ASSERT_FALSE(wait_follower_task_completed(follower));\n\n    fail::setup();\n    fail::cfg(\"nfs_copy_ok\", \"void()\");\n    ASSERT_EQ(nfs_copy_checkpoint(follower, ERR_OK, resp), ERR_OK);\n    ASSERT_TRUE(wait_follower_task_completed(follower));\n    fail::teardown();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/replica_http_service_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/replica_http_service.h\"\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_http_service_test : public duplication_test_base\n{\n};\n\nTEST_F(replica_http_service_test, query_duplication_handler)\n{\n    auto pri = stub->add_primary_replica(1, 1);\n\n    // primary confirmed_decree\n    duplication_entry ent;\n    ent.dupid = 1583306653;\n    ent.progress[pri->get_gpid().get_partition_index()] = 0;\n    ent.status = duplication_status::DS_PAUSE;\n    add_dup(pri, make_unique<replica_duplicator>(ent, pri));\n\n    replica_http_service http_svc(stub.get());\n\n    http_request req;\n    http_response resp;\n    http_svc.query_duplication_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::bad_request); // no appid\n\n    req.query_args[\"appid\"] = \"2\";\n    http_svc.query_duplication_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::not_found);\n\n    req.query_args[\"appid\"] = \"2xx\";\n    http_svc.query_duplication_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::bad_request);\n\n    auto dup = find_dup(pri, ent.dupid);\n    dup->update_progress(duplication_progress().set_last_decree(1050).set_confirmed_decree(1000));\n    pri->set_last_committed_decree(1100);\n    req.query_args[\"appid\"] = \"1\";\n    http_svc.query_duplication_handler(req, resp);\n    ASSERT_EQ(resp.status_code, http_status_code::ok);\n    ASSERT_EQ(\n        resp.body,\n        R\"({)\"\n        R\"(\"1583306653\":)\"\n        R\"({\"1.1\":{\"duplicating\":false,\"fail_mode\":\"FAIL_SLOW\",\"not_confirmed_mutations_num\":100,\"not_duplicated_mutations_num\":50}})\"\n        R\"(})\");\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/duplication/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\n./dsn_replica_dup_test\n\nif [ $? -ne 0 ]; then\n    tail -n 100 data/log/log.1.txt\n    if [ -f core ]; then\n        gdb ./dsn_replica_dup_test core -ex \"bt\"\n    fi\n    exit 1\nfi\n"
  },
  {
    "path": "src/replica/duplication/test/ship_mutation_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/duplication/mutation_batch.h\"\n#include \"replica/duplication/duplication_pipeline.h\"\n#include \"duplication_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\n/*static*/ mock_mutation_duplicator::duplicate_function mock_mutation_duplicator::_func;\n\nstruct mock_stage : pipeline::when<>\n{\n    void run() override {}\n};\n\nclass ship_mutation_test : public duplication_test_base\n{\npublic:\n    ship_mutation_test()\n    {\n        _replica->init_private_log(_log_dir);\n        duplicator = create_test_duplicator();\n    }\n\n    // ensure ship_mutation retries after error.\n    // ensure it clears up all pending mutations after stage ends.\n    // ensure it update duplicator->last_decree after stage ends.\n    void test_ship_mutation_tuple_set()\n    {\n        ship_mutation shipper(duplicator.get());\n        mock_stage end;\n\n        pipeline::base base;\n        base.thread_pool(LPC_REPLICATION_LONG_LOW).task_tracker(_replica->tracker());\n        base.from(shipper).link(end);\n\n        mutation_batch batch(duplicator.get());\n        batch.add(create_test_mutation(1, \"hello\"));\n        batch.add(create_test_mutation(2, \"hello\"));\n        mutation_tuple_set in = batch.move_all_mutations();\n        _replica->set_last_committed_decree(2);\n\n        std::vector<mutation_tuple> expected;\n        for (auto mut : in) {\n            expected.push_back(std::move(mut));\n        }\n\n        mock_mutation_duplicator::mock(\n            [&expected](mutation_tuple_set muts, mutation_duplicator::callback cb) {\n                int i = 0;\n                for (auto mut : muts) {\n                    ASSERT_EQ(std::get<0>(expected[i]), std::get<0>(mut));\n                    ASSERT_EQ(std::get<1>(expected[i]), std::get<1>(mut));\n                    ASSERT_EQ(std::get<2>(expected[i]).to_string(), std::get<2>(mut).to_string());\n                    ASSERT_EQ(std::get<2>(expected[i]).to_string(), \"hello\");\n                    i++;\n                }\n                cb(0);\n            });\n\n        shipper.run(2, std::move(in));\n\n        base.wait_all();\n        ASSERT_EQ(duplicator->progress().last_decree, 2);\n    }\n\n    ship_mutation *mock_ship_mutation()\n    {\n        duplicator->_ship = make_unique<ship_mutation>(duplicator.get());\n        return duplicator->_ship.get();\n    }\n\n    std::unique_ptr<replica_duplicator> duplicator;\n};\n\nTEST_F(ship_mutation_test, ship_mutation_tuple_set) { test_ship_mutation_tuple_set(); }\n\nvoid retry(pipeline::base *base)\n{\n    base->schedule([base]() { retry(base); }, 10_s);\n}\n\nTEST_F(ship_mutation_test, pause)\n{\n    auto shipper = mock_ship_mutation();\n\n    mutation_batch batch(duplicator.get());\n    batch.add(create_test_mutation(1, \"hello\"));\n    batch.add(create_test_mutation(2, \"hello\"));\n    mutation_tuple_set in = batch.move_all_mutations();\n    ASSERT_EQ(in.size(), 1);\n    _replica->set_last_committed_decree(2);\n\n    mock_mutation_duplicator::mock([this](mutation_tuple_set, mutation_duplicator::callback) {\n        // mock RPC retry infinitely.\n        retry(duplicator.get());\n    });\n    shipper->run(2, std::move(in));\n\n    // the ongoing RPC will be abandoned when pause_dup called.\n    duplicator->pause_dup_log();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/log_block.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"log_block.h\"\n\nnamespace dsn {\nnamespace replication {\n\nlog_block::log_block(int64_t start_offset) : _start_offset(start_offset) { init(); }\n\nlog_block::log_block() { init(); }\n\nvoid log_block::init()\n{\n    log_block_header hdr;\n\n    binary_writer temp_writer;\n    temp_writer.write_pod(hdr);\n    add(temp_writer.get_buffer());\n}\n\nvoid log_appender::append_mutation(const mutation_ptr &mu, const aio_task_ptr &cb)\n{\n    _mutations.push_back(mu);\n    if (cb) {\n        _callbacks.push_back(cb);\n    }\n    log_block *blk = &_blocks.back();\n    if (blk->size() > DEFAULT_MAX_BLOCK_BYTES) {\n        _full_blocks_size += blk->size();\n        _full_blocks_blob_cnt += blk->data().size();\n        int64_t new_block_start_offset = blk->start_offset() + blk->size();\n        _blocks.emplace_back(new_block_start_offset);\n        blk = &_blocks.back();\n    }\n    mu->data.header.log_offset = blk->start_offset() + blk->size();\n    mu->write_to([blk](const blob &bb) { blk->add(bb); });\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/log_block.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"mutation.h\"\n\nnamespace dsn {\nnamespace replication {\n\n// each block in log file has a log_block_header\nstruct log_block_header\n{\n    int32_t magic{static_cast<int32_t>(0xdeadbeef)}; // 0xdeadbeef\n    int32_t length{0};   // block data length (not including log_block_header)\n    int32_t body_crc{0}; // block data crc (not including log_block_header)\n\n    // start offset of the block (including log_block_header) in this log file\n    // TODO(wutao1): this field is unusable. the value is always set, but not read.\n    uint32_t local_offset{0};\n};\n\n// a memory structure holding data which belongs to one block.\nclass log_block\n{\n    std::vector<blob> _data; // the first blob is log_block_header\n    size_t _size{0};         // total data size of all blobs\n    int64_t _start_offset{0};\n\npublic:\n    log_block();\n\n    explicit log_block(int64_t start_offset);\n\n    // get all blobs in the block\n    const std::vector<blob> &data() const { return _data; }\n\n    // get the first blob (which contains the log_block_header) from the block\n    //\n    // TODO(wutao1): refactor `front()` to `get_log_block_header()`\n    // ```\n    //   log_block_header *get_log_block_header()\n    //   {\n    //       return reinterpret_cast<log_block_header *>(const_cast<char *>(_data.front().data()));\n    //   }\n    // ```\n    blob &front()\n    {\n        dassert(!_data.empty(), \"trying to get first blob out of an empty log block\");\n        return _data.front();\n    }\n\n    // add a blob into the block\n    void add(const blob &bb)\n    {\n        _size += bb.length();\n        _data.push_back(bb);\n    }\n\n    // return total data size in the block\n    size_t size() const { return _size; }\n\n    // global offset to start writting this block\n    int64_t start_offset() const { return _start_offset; }\n\nprivate:\n    friend class log_appender;\n    void init();\n};\n\n// Append writes into a buffer which consists of one or more fixed-size log blocks,\n// which will be continuously flushed into one log file.\n// Not thread-safe. Requires lock protection.\nclass log_appender\n{\npublic:\n    explicit log_appender(int64_t start_offset) { _blocks.emplace_back(start_offset); }\n\n    log_appender(int64_t start_offset, log_block &block)\n    {\n        block._start_offset = start_offset;\n        _blocks.emplace_back(std::move(block));\n    }\n\n    void append_mutation(const mutation_ptr &mu, const aio_task_ptr &cb);\n\n    size_t size() const { return _full_blocks_size + _blocks.crbegin()->size(); }\n    size_t blob_count() const { return _full_blocks_blob_cnt + _blocks.crbegin()->data().size(); }\n\n    std::vector<mutation_ptr> mutations() const { return _mutations; }\n\n    // The callback registered for each write.\n    const std::vector<aio_task_ptr> &callbacks() const { return _callbacks; }\n\n    // Returns the heading block's start_offset.\n    int64_t start_offset() const { return _blocks.cbegin()->start_offset(); }\n\n    std::vector<log_block> &all_blocks() { return _blocks; }\n\nprotected:\n    static constexpr size_t DEFAULT_MAX_BLOCK_BYTES = 1 * 1024 * 1024; // 1MB\n\n    // |---------------------- _blocks ----------------------|\n    // | full block 0 | full block 1 | .... | unfilled block |\n\n    // New block is appended to tail.\n    // The tailing block is the only block that may be unfilled.\n    std::vector<log_block> _blocks;\n    size_t _full_blocks_size{0};\n    size_t _full_blocks_blob_cnt{0};\n    std::vector<aio_task_ptr> _callbacks;\n    std::vector<mutation_ptr> _mutations;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/log_file.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"log_file.h\"\n\n#include <fcntl.h>\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/crc.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"log_file_stream.h\"\n\nnamespace dsn {\nnamespace replication {\n\nlog_file::~log_file() { close(); }\n/*static */ log_file_ptr log_file::open_read(const char *path, /*out*/ error_code &err)\n{\n    char splitters[] = {'\\\\', '/', 0};\n    std::string name = utils::get_last_component(std::string(path), splitters);\n\n    // log.index.start_offset\n    if (name.length() < strlen(\"log.\") || name.substr(0, strlen(\"log.\")) != std::string(\"log.\")) {\n        err = ERR_INVALID_PARAMETERS;\n        dwarn(\"invalid log path %s\", path);\n        return nullptr;\n    }\n\n    auto pos = name.find_first_of('.');\n    dassert(pos != std::string::npos, \"invalid log_file, name = %s\", name.c_str());\n    auto pos2 = name.find_first_of('.', pos + 1);\n    if (pos2 == std::string::npos) {\n        err = ERR_INVALID_PARAMETERS;\n        dwarn(\"invalid log path %s\", path);\n        return nullptr;\n    }\n\n    /* so the log file format is log.index_str.start_offset_str */\n    std::string index_str = name.substr(pos + 1, pos2 - pos - 1);\n    std::string start_offset_str = name.substr(pos2 + 1);\n    if (index_str.empty() || start_offset_str.empty()) {\n        err = ERR_INVALID_PARAMETERS;\n        dwarn(\"invalid log path %s\", path);\n        return nullptr;\n    }\n\n    char *p = nullptr;\n    int index = static_cast<int>(strtol(index_str.c_str(), &p, 10));\n    if (*p != 0) {\n        err = ERR_INVALID_PARAMETERS;\n        dwarn(\"invalid log path %s\", path);\n        return nullptr;\n    }\n    int64_t start_offset = static_cast<int64_t>(strtoll(start_offset_str.c_str(), &p, 10));\n    if (*p != 0) {\n        err = ERR_INVALID_PARAMETERS;\n        dwarn(\"invalid log path %s\", path);\n        return nullptr;\n    }\n\n    disk_file *hfile = file::open(path, O_RDONLY | O_BINARY, 0);\n    if (!hfile) {\n        err = ERR_FILE_OPERATION_FAILED;\n        dwarn(\"open log file %s failed\", path);\n        return nullptr;\n    }\n\n    auto lf = new log_file(path, hfile, index, start_offset, true);\n    lf->reset_stream();\n    blob hdr_blob;\n    err = lf->read_next_log_block(hdr_blob);\n    if (err == ERR_INVALID_DATA || err == ERR_INCOMPLETE_DATA || err == ERR_HANDLE_EOF ||\n        err == ERR_FILE_OPERATION_FAILED) {\n        std::string removed = std::string(path) + \".removed\";\n        derror(\"read first log entry of file %s failed, err = %s. Rename the file to %s\",\n               path,\n               err.to_string(),\n               removed.c_str());\n        delete lf;\n        lf = nullptr;\n\n        // rename file on failure\n        dsn::utils::filesystem::rename_path(path, removed);\n\n        return nullptr;\n    }\n\n    binary_reader reader(std::move(hdr_blob));\n    lf->read_file_header(reader);\n    if (!lf->is_right_header()) {\n        std::string removed = std::string(path) + \".removed\";\n        derror(\"invalid log file header of file %s. Rename the file to %s\", path, removed.c_str());\n        delete lf;\n        lf = nullptr;\n\n        // rename file on failure\n        dsn::utils::filesystem::rename_path(path, removed);\n\n        err = ERR_INVALID_DATA;\n        return nullptr;\n    }\n\n    err = ERR_OK;\n    return lf;\n}\n\n/*static*/ log_file_ptr log_file::create_write(const char *dir, int index, int64_t start_offset)\n{\n    char path[512];\n    sprintf(path, \"%s/log.%d.%\" PRId64, dir, index, start_offset);\n\n    if (dsn::utils::filesystem::path_exists(std::string(path))) {\n        dwarn(\"log file %s already exist\", path);\n        return nullptr;\n    }\n\n    disk_file *hfile = file::open(path, O_RDWR | O_CREAT | O_BINARY, 0666);\n    if (!hfile) {\n        dwarn(\"create log %s failed\", path);\n        return nullptr;\n    }\n\n    return new log_file(path, hfile, index, start_offset, false);\n}\n\nlog_file::log_file(\n    const char *path, disk_file *handle, int index, int64_t start_offset, bool is_read)\n    : _is_read(is_read)\n{\n    _start_offset = start_offset;\n    _end_offset = start_offset;\n    _handle = handle;\n    _path = path;\n    _index = index;\n    _crc32 = 0;\n    _last_write_time = 0;\n    memset(&_header, 0, sizeof(_header));\n\n    if (is_read) {\n        int64_t sz;\n        if (!dsn::utils::filesystem::file_size(_path, sz)) {\n            dassert(false, \"fail to get file size of %s.\", _path.c_str());\n        }\n        _end_offset += sz;\n    }\n}\n\nvoid log_file::close()\n{\n    zauto_lock lock(_write_lock);\n\n    //_stream implicitly refer to _handle so it needs to be cleaned up first.\n    // TODO: We need better abstraction to avoid those manual stuffs..\n    _stream.reset(nullptr);\n    if (_handle) {\n        error_code err = file::close(_handle);\n        dassert(err == ERR_OK, \"file::close failed, err = %s\", err.to_string());\n\n        _handle = nullptr;\n    }\n}\n\nvoid log_file::flush() const\n{\n    dassert(!_is_read, \"log file must be of write mode\");\n    zauto_lock lock(_write_lock);\n\n    if (_handle) {\n        error_code err = file::flush(_handle);\n        dassert(err == ERR_OK, \"file::flush failed, err = %s\", err.to_string());\n    }\n}\n\nerror_code log_file::read_next_log_block(/*out*/ ::dsn::blob &bb)\n{\n    dassert(_is_read, \"log file must be of read mode\");\n    auto err = _stream->read_next(sizeof(log_block_header), bb);\n    if (err != ERR_OK || bb.length() != sizeof(log_block_header)) {\n        if (err == ERR_OK || err == ERR_HANDLE_EOF) {\n            // if read_count is 0, then we meet the end of file\n            err = (bb.length() == 0 ? ERR_HANDLE_EOF : ERR_INCOMPLETE_DATA);\n        } else {\n            derror(\"read data block header failed, size = %d vs %d, err = %s\",\n                   bb.length(),\n                   (int)sizeof(log_block_header),\n                   err.to_string());\n        }\n\n        return err;\n    }\n    log_block_header hdr = *reinterpret_cast<const log_block_header *>(bb.data());\n\n    if (hdr.magic != 0xdeadbeef) {\n        derror(\"invalid data header magic: 0x%x\", hdr.magic);\n        return ERR_INVALID_DATA;\n    }\n\n    err = _stream->read_next(hdr.length, bb);\n    if (err != ERR_OK || hdr.length != bb.length()) {\n        derror(\"read data block body failed, size = %d vs %d, err = %s\",\n               bb.length(),\n               (int)hdr.length,\n               err.to_string());\n\n        if (err == ERR_OK || err == ERR_HANDLE_EOF) {\n            // because already read log_block_header above, so here must be imcomplete data\n            err = ERR_INCOMPLETE_DATA;\n        }\n\n        return err;\n    }\n\n    auto crc = dsn::utils::crc32_calc(\n        static_cast<const void *>(bb.data()), static_cast<size_t>(hdr.length), _crc32);\n    if (crc != hdr.body_crc) {\n        derror(\"crc checking failed\");\n        return ERR_INVALID_DATA;\n    }\n    _crc32 = crc;\n\n    return ERR_OK;\n}\n\naio_task_ptr log_file::commit_log_block(log_block &block,\n                                        int64_t offset,\n                                        dsn::task_code evt,\n                                        dsn::task_tracker *tracker,\n                                        aio_handler &&callback,\n                                        int hash)\n{\n    log_appender pending(offset, block);\n    return commit_log_blocks(pending, evt, tracker, std::move(callback), hash);\n}\naio_task_ptr log_file::commit_log_blocks(log_appender &pending,\n                                         dsn::task_code evt,\n                                         dsn::task_tracker *tracker,\n                                         aio_handler &&callback,\n                                         int hash)\n{\n    dassert(!_is_read, \"log file must be of write mode\");\n    dcheck_gt(pending.size(), 0);\n\n    zauto_lock lock(_write_lock);\n    if (!_handle) {\n        return nullptr;\n    }\n\n    auto size = (long long)pending.size();\n    size_t vec_size = pending.blob_count();\n    std::vector<dsn_file_buffer_t> buffer_vector(vec_size);\n    int buffer_idx = 0;\n    for (log_block &block : pending.all_blocks()) {\n        int64_t local_offset = block.start_offset() - start_offset();\n        auto hdr = reinterpret_cast<log_block_header *>(const_cast<char *>(block.front().data()));\n\n        dassert(hdr->magic == 0xdeadbeef, \"\");\n        hdr->local_offset = local_offset;\n        hdr->length = static_cast<int32_t>(block.size() - sizeof(log_block_header));\n        hdr->body_crc = _crc32;\n\n        for (int i = 0; i < block.data().size(); i++) {\n            auto &blk = block.data()[i];\n            buffer_vector[buffer_idx].buffer = static_cast<void *>(const_cast<char *>(blk.data()));\n            buffer_vector[buffer_idx].size = blk.length();\n\n            // skip block header\n            if (i > 0) {\n                hdr->body_crc = dsn::utils::crc32_calc(static_cast<const void *>(blk.data()),\n                                                       static_cast<size_t>(blk.length()),\n                                                       hdr->body_crc);\n            }\n            buffer_idx++;\n        }\n        _crc32 = hdr->body_crc;\n    }\n\n    aio_task_ptr tsk;\n    int64_t local_offset = pending.start_offset() - start_offset();\n    if (callback) {\n        tsk = file::write_vector(_handle,\n                                 buffer_vector.data(),\n                                 vec_size,\n                                 static_cast<uint64_t>(local_offset),\n                                 evt,\n                                 tracker,\n                                 std::forward<aio_handler>(callback),\n                                 hash);\n    } else {\n        tsk = file::write_vector(_handle,\n                                 buffer_vector.data(),\n                                 vec_size,\n                                 static_cast<uint64_t>(local_offset),\n                                 evt,\n                                 tracker,\n                                 nullptr,\n                                 hash);\n    }\n\n    if (utils::FLAGS_enable_latency_tracer) {\n        tsk->_tracer->set_parent_point_name(\"commit_pending_mutations\");\n        tsk->_tracer->set_description(\"log\");\n        for (const auto &mutation : pending.mutations()) {\n            mutation->_tracer->add_sub_tracer(tsk->_tracer);\n        }\n    }\n\n    _end_offset.fetch_add(size);\n    return tsk;\n}\n\nvoid log_file::reset_stream(size_t offset /*default = 0*/)\n{\n    if (_stream == nullptr) {\n        _stream.reset(new file_streamer(_handle, offset));\n    } else {\n        _stream->reset(offset);\n    }\n    if (offset == 0) {\n        _crc32 = 0;\n    }\n}\n\ndecree log_file::previous_log_max_decree(const dsn::gpid &pid)\n{\n    auto it = _previous_log_max_decrees.find(pid);\n    return it == _previous_log_max_decrees.end() ? 0 : it->second.max_decree;\n}\n\nint log_file::read_file_header(binary_reader &reader)\n{\n    /*\n     * the log file header structure:\n     *   log_file_header +\n     *   count + count * (gpid + replica_log_info)\n     */\n    reader.read_pod(_header);\n\n    int count;\n    reader.read(count);\n    for (int i = 0; i < count; i++) {\n        gpid gpid;\n        replica_log_info info;\n\n        reader.read_pod(gpid);\n        reader.read_pod(info);\n\n        _previous_log_max_decrees[gpid] = info;\n    }\n\n    return get_file_header_size();\n}\n\nint log_file::get_file_header_size() const\n{\n    int count = static_cast<int>(_previous_log_max_decrees.size());\n    return static_cast<int>(sizeof(log_file_header) + sizeof(count) +\n                            (sizeof(gpid) + sizeof(replica_log_info)) * count);\n}\n\nbool log_file::is_right_header() const\n{\n    return _header.magic == 0xdeadbeef && _header.start_global_offset == _start_offset;\n}\n\nint log_file::write_file_header(binary_writer &writer, const replica_log_info_map &init_max_decrees)\n{\n    /*\n     * the log file header structure:\n     *   log_file_header +\n     *   count + count * (gpid + replica_log_info)\n     */\n    _previous_log_max_decrees = init_max_decrees;\n\n    _header.magic = 0xdeadbeef;\n    _header.version = 0x1;\n    _header.start_global_offset = start_offset();\n\n    writer.write_pod(_header);\n\n    int count = static_cast<int>(_previous_log_max_decrees.size());\n    writer.write(count);\n    for (auto &kv : _previous_log_max_decrees) {\n        writer.write_pod(kv.first);\n        writer.write_pod(kv.second);\n    }\n\n    return get_file_header_size();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/log_file.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"log_block.h\"\n\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\nnamespace replication {\n\n// each log file has a log_file_header stored at the beginning of the first block's data content\nstruct log_file_header\n{\n    int32_t magic;   // 0xdeadbeef\n    int32_t version; // current 0x1\n    int64_t\n        start_global_offset; // start offset in the global space, equals to the file name's postfix\n};\n\n// a structure to record replica's log info\nstruct replica_log_info\n{\n    int64_t max_decree;\n    int64_t valid_start_offset; // valid start offset in global space\n    replica_log_info(int64_t d, int64_t o)\n    {\n        max_decree = d;\n        valid_start_offset = o;\n    }\n    replica_log_info()\n    {\n        max_decree = 0;\n        valid_start_offset = 0;\n    }\n    bool operator==(const replica_log_info &o) const\n    {\n        return max_decree == o.max_decree && valid_start_offset == o.valid_start_offset;\n    }\n};\n\ntypedef std::unordered_map<gpid, replica_log_info> replica_log_info_map;\n\nclass log_file;\ntypedef dsn::ref_ptr<log_file> log_file_ptr;\n\n//\n// the log file is structured with sequences of log_blocks,\n// each block consists of the log_block_header + log_content,\n// and the first block contains the log_file_header at the beginning\n//\n// the class is not thread safe\n//\nclass log_file : public ref_counter\n{\npublic:\n    ~log_file();\n\n    //\n    // file operations\n    //\n\n    // open the log file for read\n    // 'path' should be in format of log.{index}.{start_offset}, where:\n    //   - index: the index of the log file, start from 1\n    //   - start_offset: start offset in the global space\n    // returns:\n    //   - non-null if open succeed\n    //   - null if open failed\n    static log_file_ptr open_read(const char *path, /*out*/ error_code &err);\n\n    // open the log file for write\n    // the file path is '{dir}/log.{index}.{start_offset}'\n    // returns:\n    //   - non-null if open succeed\n    //   - null if open failed\n    static log_file_ptr create_write(const char *dir, int index, int64_t start_offset);\n\n    // close the log file\n    void close();\n\n    // flush the log file\n    void flush() const;\n\n    //\n    // read routines\n    //\n\n    // sync read the next log entry from the file\n    // the entry data is start from the 'local_offset' of the file\n    // the result is passed out by 'bb', not including the log_block_header\n    // return error codes:\n    //  - ERR_OK\n    //  - ERR_HANDLE_EOF\n    //  - ERR_INCOMPLETE_DATA\n    //  - ERR_INVALID_DATA\n    //  - other io errors caused by file read operator\n    error_code read_next_log_block(/*out*/ ::dsn::blob &bb);\n\n    //\n    // write routines\n    //\n\n    // async write log entry into the file\n    // 'block' is the date to be written\n    // 'offset' is start offset of the entry in the global space\n    // 'evt' is to indicate which thread pool to execute the callback\n    // 'callback_host' is used to get tracer\n    // 'callback' is to indicate the callback handler\n    // 'hash' helps to choose which thread in the thread pool to execute the callback\n    // returns:\n    //   - non-null if io task is in pending\n    //   - null if error\n    dsn::aio_task_ptr commit_log_block(log_block &block,\n                                       int64_t offset,\n                                       dsn::task_code evt,\n                                       dsn::task_tracker *tracker,\n                                       aio_handler &&callback,\n                                       int hash);\n    dsn::aio_task_ptr commit_log_blocks(log_appender &pending,\n                                        dsn::task_code evt,\n                                        dsn::task_tracker *tracker,\n                                        aio_handler &&callback,\n                                        int hash);\n\n    //\n    // others\n    //\n\n    // Reset file_streamer to point to `offset`.\n    // offset=0 means the start of this log file.\n    void reset_stream(size_t offset = 0);\n    // end offset in the global space: end_offset = start_offset + file_size\n    int64_t end_offset() const { return _end_offset.load(); }\n    // start offset in the global space\n    int64_t start_offset() const { return _start_offset; }\n    // file index\n    int index() const { return _index; }\n    // file path\n    const std::string &path() const { return _path; }\n    // previous decrees\n    const replica_log_info_map &previous_log_max_decrees() { return _previous_log_max_decrees; }\n    // previous decree for speicified gpid\n    decree previous_log_max_decree(const gpid &pid);\n    // file header\n    log_file_header &header() { return _header; }\n\n    // read file header from reader, return byte count consumed\n    int read_file_header(binary_reader &reader);\n    // write file header to writer, return byte count written\n    int write_file_header(binary_writer &writer, const replica_log_info_map &init_max_decrees);\n    // get serialized size of current file header\n    int get_file_header_size() const;\n    // if the file header is valid\n    bool is_right_header() const;\n\n    // set & get last write time, used for gc\n    void set_last_write_time(uint64_t last_write_time) { _last_write_time = last_write_time; }\n    uint64_t last_write_time() const { return _last_write_time; }\n\n    const disk_file *file_handle() const { return _handle; }\n\nprivate:\n    // make private, user should create log_file through open_read() or open_write()\n    log_file(const char *path, disk_file *handle, int index, int64_t start_offset, bool is_read);\n\nprivate:\n    friend class mock_log_file;\n\n    uint32_t _crc32;\n    int64_t _start_offset; // start offset in the global space\n    std::atomic<int64_t>\n        _end_offset; // end offset in the global space: end_offset = start_offset + file_size\n    class file_streamer;\n    std::unique_ptr<file_streamer> _stream;\n    disk_file *_handle;        // file handle\n    const bool _is_read;       // if opened for read or write\n    std::string _path;         // file path\n    int _index;                // file index\n    log_file_header _header;   // file header\n    uint64_t _last_write_time; // seconds from epoch time\n\n    mutable zlock _write_lock;\n\n    // this data is used for garbage collection, and is part of file header.\n    // for read, the value is read from file header.\n    // for write, the value is set by write_file_header().\n    replica_log_info_map _previous_log_max_decrees;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/log_file_stream.h",
    "content": "\n\n/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"log_file.h\"\n\nnamespace dsn {\nnamespace replication {\n\n// log_file::file_streamer\nclass log_file::file_streamer\n{\npublic:\n    explicit file_streamer(disk_file *fd, size_t file_offset)\n        : _file_dispatched_bytes(file_offset), _file_handle(fd)\n    {\n        _current_buffer = _buffers + 0;\n        _next_buffer = _buffers + 1;\n        fill_buffers();\n    }\n    ~file_streamer()\n    {\n        _current_buffer->wait_ongoing_task();\n        _next_buffer->wait_ongoing_task();\n    }\n    // try to reset file_offset\n    void reset(size_t file_offset)\n    {\n        _current_buffer->wait_ongoing_task();\n        _next_buffer->wait_ongoing_task();\n        // fast path if we can just move the cursor\n        if (_current_buffer->_file_offset_of_buffer <= file_offset &&\n            _current_buffer->_file_offset_of_buffer + _current_buffer->_end > file_offset) {\n            _current_buffer->_begin = file_offset - _current_buffer->_file_offset_of_buffer;\n        } else {\n            _current_buffer->_begin = _current_buffer->_end = _next_buffer->_begin =\n                _next_buffer->_end = 0;\n            _file_dispatched_bytes = file_offset;\n        }\n        fill_buffers();\n    }\n\n    // TODO(wutao1): use string_view instead of using blob.\n    // WARNING: the resulted blob is not guaranteed to be reference counted.\n    // possible error_code:\n    //  ERR_OK                      result would always size as expected\n    //  ERR_HANDLE_EOF              if there are not enough data in file. result would still be\n    //                              filled with possible data\n    //  ERR_FILE_OPERATION_FAILED   filesystem failure\n    error_code read_next(size_t size, /*out*/ blob &result)\n    {\n        binary_writer writer(size);\n\n#define TRY(x)                                                                                     \\\n    do {                                                                                           \\\n        auto _x = (x);                                                                             \\\n        if (_x != ERR_OK) {                                                                        \\\n            result = writer.get_current_buffer();                                                  \\\n            return _x;                                                                             \\\n        }                                                                                          \\\n    } while (0)\n\n        TRY(_current_buffer->wait_ongoing_task());\n        if (size < _current_buffer->length()) {\n            result.assign(_current_buffer->_buffer.get(), _current_buffer->_begin, size);\n            _current_buffer->_begin += size;\n        } else {\n            _current_buffer->drain(writer);\n            // we can now assign result since writer must have allocated a buffer.\n            dassert(writer.total_size() != 0, \"writer.total_size = %d\", writer.total_size());\n            if (size > writer.total_size()) {\n                TRY(_next_buffer->wait_ongoing_task());\n                _next_buffer->consume(writer,\n                                      std::min(size - writer.total_size(), _next_buffer->length()));\n                // We hope that this never happens, it would deteriorate performance\n                if (size > writer.total_size()) {\n                    auto task =\n                        file::read(_file_handle,\n                                   writer.get_current_buffer().buffer().get() + writer.total_size(),\n                                   size - writer.total_size(),\n                                   _file_dispatched_bytes,\n                                   LPC_AIO_IMMEDIATE_CALLBACK,\n                                   nullptr,\n                                   nullptr);\n                    task->wait();\n                    writer.write_empty(task->get_transferred_size());\n                    _file_dispatched_bytes += task->get_transferred_size();\n                    TRY(task->error());\n                }\n            }\n            result = writer.get_current_buffer();\n        }\n        fill_buffers();\n        return ERR_OK;\n#undef TRY\n    }\n\nprivate:\n    void fill_buffers()\n    {\n        while (!_current_buffer->_have_ongoing_task && _current_buffer->empty()) {\n            _current_buffer->_begin = _current_buffer->_end = 0;\n            _current_buffer->_file_offset_of_buffer = _file_dispatched_bytes;\n            _current_buffer->_have_ongoing_task = true;\n            _current_buffer->_task = file::read(_file_handle,\n                                                _current_buffer->_buffer.get(),\n                                                block_size_bytes,\n                                                _file_dispatched_bytes,\n                                                LPC_AIO_IMMEDIATE_CALLBACK,\n                                                nullptr,\n                                                nullptr);\n            _file_dispatched_bytes += block_size_bytes;\n            std::swap(_current_buffer, _next_buffer);\n        }\n    }\n\n    // buffer size, in bytes\n    // TODO(wutao1): call it BLOCK_BYTES_SIZE\n    static constexpr size_t block_size_bytes = 1024 * 1024; // 1MB\n    struct buffer_t\n    {\n        std::unique_ptr<char[]> _buffer; // with block_size\n        size_t _begin, _end;             // [buffer[begin]..buffer[end]) contains unconsumed_data\n        size_t _file_offset_of_buffer;   // file offset projected to buffer[0]\n        bool _have_ongoing_task;\n        aio_task_ptr _task;\n\n        buffer_t()\n            : _buffer(new char[block_size_bytes]),\n              _begin(0),\n              _end(0),\n              _file_offset_of_buffer(0),\n              _have_ongoing_task(false)\n        {\n        }\n        size_t length() const { return _end - _begin; }\n        bool empty() const { return length() == 0; }\n        void consume(binary_writer &dest, size_t len)\n        {\n            dest.write(_buffer.get() + _begin, len);\n            _begin += len;\n        }\n        size_t drain(binary_writer &dest)\n        {\n            auto len = length();\n            consume(dest, len);\n            return len;\n        }\n        error_code wait_ongoing_task()\n        {\n            if (_have_ongoing_task) {\n                _task->wait();\n                _have_ongoing_task = false;\n                _end += _task->get_transferred_size();\n                dassert(_end <= block_size_bytes, \"invalid io_size.\");\n                return _task->error();\n            } else {\n                return ERR_OK;\n            }\n        }\n    } _buffers[2];\n    buffer_t *_current_buffer, *_next_buffer;\n\n    // number of bytes we have issued read operations\n    size_t _file_dispatched_bytes;\n    disk_file *_file_handle;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/mutation.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica.h\"\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint64(\"replication\",\n                  abnormal_write_trace_latency_threshold,\n                  1000 * 1000 * 1000, // 1s\n                  \"latency trace will be logged when exceed the write latency threshold\");\nDSN_TAG_VARIABLE(abnormal_write_trace_latency_threshold, FT_MUTABLE);\n\nstd::atomic<uint64_t> mutation::s_tid(0);\n\nmutation::mutation()\n{\n    next = nullptr;\n    _private0 = 0;\n    _not_logged = 1;\n    _prepare_ts_ms = 0;\n    strcpy(_name, \"0.0.0.0\");\n    _appro_data_bytes = sizeof(mutation_header);\n    _create_ts_ns = dsn_now_ns();\n    _tid = ++s_tid;\n    _is_sync_to_child = false;\n    _tracer = std::make_shared<dsn::utils::latency_tracer>(\n        false, \"mutation\", FLAGS_abnormal_write_trace_latency_threshold);\n}\n\nmutation_ptr mutation::copy_no_reply(const mutation_ptr &old_mu)\n{\n    mutation_ptr mu(new mutation());\n    mu->_private0 = old_mu->_private0;\n    strcpy(mu->_name, old_mu->_name);\n    mu->_appro_data_bytes = old_mu->_appro_data_bytes;\n    mu->data = old_mu->data;\n    mu->_is_sync_to_child = old_mu->is_sync_to_child();\n    // create a new message without client information, it will not rely\n    for (auto req : old_mu->client_requests) {\n        if (req != nullptr) {\n            dsn::message_ex *new_req = message_ex::copy_message_no_reply(*req);\n            mu->client_requests.emplace_back(new_req);\n        } else {\n            mu->client_requests.emplace_back(req);\n        }\n    }\n    return mu;\n}\n\nmutation::~mutation()\n{\n    for (auto &r : client_requests) {\n        if (r != nullptr) {\n            r->release_ref();\n        }\n    }\n\n    for (auto &request : _prepare_requests) {\n        request->release_ref();\n    }\n}\n\nvoid mutation::set_id(ballot b, decree c)\n{\n    data.header.ballot = b;\n    data.header.decree = c;\n\n    snprintf_p(_name,\n               sizeof(_name),\n               \"%\" PRId32 \".%\" PRId32 \".%\" PRId64 \".%\" PRId64,\n               data.header.pid.get_app_id(),\n               data.header.pid.get_partition_index(),\n               data.header.ballot,\n               data.header.decree);\n}\n\nvoid mutation::copy_from(mutation_ptr &old)\n{\n    data.updates = old->data.updates;\n    client_requests = old->client_requests;\n    _appro_data_bytes = old->_appro_data_bytes;\n    _create_ts_ns = old->_create_ts_ns;\n\n    for (auto &r : client_requests) {\n        if (r != nullptr) {\n            // release in dctor\n            r->add_ref();\n        }\n    }\n\n    // let's always re-append the mutation to\n    // replication logs as the ballot number\n    // is changed, to ensure the invariance:\n    // if decree(A) >= decree(B)\n    // then ballot(A) >= ballot(B)\n    /*if (old->is_logged())\n    {\n        set_logged();\n        data.header.log_offset = old->data.header.log_offset;\n    }\n    */\n\n    _prepare_requests = old->prepare_requests();\n    for (auto &request : _prepare_requests) {\n        request->add_ref();\n    }\n}\n\nvoid mutation::add_client_request(task_code code, dsn::message_ex *request)\n{\n    data.updates.push_back(mutation_update());\n    mutation_update &update = data.updates.back();\n    _appro_data_bytes += 32; // approximate code size\n\n    if (request != nullptr) {\n        update.code = code;\n        update.serialization_type =\n            (dsn_msg_serialize_format)request->header->context.u.serialize_format;\n        update.__set_start_time_ns(dsn_now_ns());\n        request->add_ref(); // released on dctor\n\n        void *ptr;\n        size_t size;\n        bool r = request->read_next(&ptr, &size);\n        dassert(r, \"payload is not present\");\n        request->read_commit(0); // so we can re-read the request buffer in replicated app\n        update.data.assign((char *)ptr, 0, (int)size);\n\n        _appro_data_bytes += sizeof(int) + (int)size; // data size\n    } else {\n        update.code = RPC_REPLICATION_WRITE_EMPTY;\n        _appro_data_bytes += sizeof(int); // empty data size\n    }\n\n    client_requests.push_back(request);\n\n    dassert(client_requests.size() == data.updates.size(), \"size must be equal\");\n}\n\nvoid mutation::write_to(const std::function<void(const blob &)> &inserter) const\n{\n    binary_writer writer(1024);\n    write_mutation_header(writer, data.header);\n    writer.write_pod(static_cast<int>(data.updates.size()));\n    for (const mutation_update &update : data.updates) {\n        // write task_code as string to make it cross-process compatible.\n        // avoid memory copy, equal to writer.write(std::string)\n        const char *cstr = update.code.to_string();\n        int len = static_cast<int>(strlen(cstr));\n        writer.write_pod(len);\n        if (len > 0)\n            writer.write(cstr, len);\n\n        writer.write_pod(static_cast<int>(update.serialization_type));\n\n        writer.write_pod(static_cast<int>(update.data.length()));\n    }\n    inserter(writer.get_buffer());\n    for (const mutation_update &update : data.updates) {\n        inserter(update.data);\n    }\n}\n\nvoid mutation::write_to(binary_writer &writer, dsn::message_ex * /*to*/) const\n{\n    write_mutation_header(writer, data.header);\n    writer.write_pod(static_cast<int>(data.updates.size()));\n    for (const mutation_update &update : data.updates) {\n        // write task_code as string to make it cross-process compatible.\n        // avoid memory copy, equal to writer.write(std::string)\n        const char *cstr = update.code.to_string();\n        int len = static_cast<int>(strlen(cstr));\n        writer.write_pod(len);\n        if (len > 0)\n            writer.write(cstr, len);\n\n        writer.write_pod(static_cast<int>(update.serialization_type));\n\n        writer.write_pod(static_cast<int>(update.data.length()));\n    }\n    // TODO(qinzuoyan): directly append buffer to message to avoid memory copy\n    for (const mutation_update &update : data.updates) {\n        writer.write(update.data.data(), update.data.length());\n    }\n}\n\n/*static*/ mutation_ptr mutation::read_from(binary_reader &reader, dsn::message_ex *from)\n{\n    mutation_ptr mu(new mutation());\n    read_mutation_header(reader, mu->data.header);\n\n    int size;\n    reader.read_pod(size);\n    mu->data.updates.resize(size);\n    std::vector<int> lengths(size, 0);\n    for (int i = 0; i < size; ++i) {\n        std::string name;\n        reader.read(name);\n        ::dsn::task_code code = dsn::task_code::try_get(name, TASK_CODE_INVALID);\n        dassert(code != TASK_CODE_INVALID, \"invalid mutation task code: %s\", name.c_str());\n        mu->data.updates[i].code = code;\n\n        int type;\n        reader.read_pod(type);\n        mu->data.updates[i].serialization_type = type;\n\n        reader.read_pod(lengths[i]);\n    }\n    for (int i = 0; i < size; ++i) {\n        reader.read(mu->data.updates[i].data, lengths[i]);\n    }\n\n    mu->client_requests.resize(mu->data.updates.size());\n    mu->add_prepare_request(from);\n\n    snprintf_p(mu->_name,\n               sizeof(mu->_name),\n               \"%\" PRId32 \".%\" PRId32 \".%\" PRId64 \".%\" PRId64,\n               mu->data.header.pid.get_app_id(),\n               mu->data.header.pid.get_partition_index(),\n               mu->data.header.ballot,\n               mu->data.header.decree);\n\n    return mu;\n}\n\n/*static*/ void mutation::write_mutation_header(binary_writer &writer,\n                                                const mutation_header &header)\n{\n    writer.write_pod((int64_t)0);\n    writer.write_pod(header.pid.value());\n    writer.write_pod(header.ballot);\n    writer.write_pod(header.decree);\n    writer.write_pod(header.log_offset);\n    writer.write_pod(header.last_committed_decree);\n    writer.write_pod(header.timestamp);\n}\n\n/*static*/ void mutation::read_mutation_header(binary_reader &reader, mutation_header &header)\n{\n    // original code:\n    //   reader.read_pod(mu->data.header);\n    // this will read 7*8=56 bytes of:\n    //   - vptr (which must > 64)\n    //   - gpid\n    //   - ballot\n    //   - decree\n    //   - log_offset\n    //   - last_committed_decree\n    //   - __isset\n    //\n    // new code (also 7*8=56 bytes):\n    //   - version\n    //   - gpid\n    //   - decree\n    //   - ballot\n    //   - log_offset\n    //   - last_committed_decree\n    //   - timestamp\n    int64_t version;\n    reader.read_pod(version);\n    uint64_t pid_value;\n    reader.read_pod(pid_value);\n    header.pid.set_value(pid_value);\n    reader.read_pod(header.ballot);\n    reader.read_pod(header.decree);\n    reader.read_pod(header.log_offset);\n    reader.read_pod(header.last_committed_decree);\n    if (version == 0) {\n        reader.read_pod(header.timestamp);\n    } else if (version > 64) {\n        // version is vptr, we need read '__isset', and ignore it\n        int64_t isset;\n        reader.read_pod(isset);\n        header.timestamp = 0;\n    } else {\n        dassert(false, \"invalid mutation log version: 0x%\" PRIx64, version);\n    }\n}\n\nint mutation::clear_prepare_or_commit_tasks()\n{\n    int c = 0;\n    for (auto it = _prepare_or_commit_tasks.begin(); it != _prepare_or_commit_tasks.end(); ++it) {\n        if (it->second->cancel(true)) {\n            c++;\n        }\n    }\n\n    _prepare_or_commit_tasks.clear();\n    return c;\n}\n\nvoid mutation::wait_log_task() const\n{\n    if (_log_task != nullptr) {\n        _log_task->wait();\n    }\n}\n\nmutation_queue::mutation_queue(gpid gpid,\n                               int max_concurrent_op /*= 2*/,\n                               bool batch_write_disabled /*= false*/)\n    : _max_concurrent_op(max_concurrent_op), _batch_write_disabled(batch_write_disabled)\n{\n    _current_op_count = 0;\n    _pending_mutation = nullptr;\n    dassert(gpid.get_app_id() != 0, \"invalid gpid\");\n    _pcount = dsn_task_queue_virtual_length_ptr(RPC_PREPARE, gpid.thread_hash());\n}\n\nmutation_ptr mutation_queue::add_work(task_code code, dsn::message_ex *request, replica *r)\n{\n    task_spec *spec = task_spec::get(code);\n\n    // if not allow write batch, switch work queue\n    if (_pending_mutation && !spec->rpc_request_is_write_allow_batch) {\n        _pending_mutation->add_ref(); // released when unlink\n        _hdr.add(_pending_mutation);\n        _pending_mutation = nullptr;\n        ++(*_pcount);\n    }\n\n    // add to work queue\n    if (!_pending_mutation) {\n        _pending_mutation = r->new_mutation(invalid_decree);\n    }\n\n    dinfo(\"add request with trace_id = %016\" PRIx64 \" into mutation with mutation_tid = %\" PRIu64,\n          request->header->trace_id,\n          _pending_mutation->tid());\n\n    _pending_mutation->add_client_request(code, request);\n\n    // short-cut\n    if (_current_op_count < _max_concurrent_op && _hdr.is_empty()) {\n        auto ret = _pending_mutation;\n        _pending_mutation = nullptr;\n        _current_op_count++;\n        return ret;\n    }\n\n    // check if need to switch work queue\n    if (_batch_write_disabled || !spec->rpc_request_is_write_allow_batch ||\n        _pending_mutation->is_full()) {\n        _pending_mutation->add_ref(); // released when unlink\n        _hdr.add(_pending_mutation);\n        _pending_mutation = nullptr;\n        ++(*_pcount);\n    }\n\n    // get next work item\n    if (_current_op_count >= _max_concurrent_op)\n        return nullptr;\n    else if (_hdr.is_empty()) {\n        dassert(_pending_mutation != nullptr, \"pending mutation cannot be null\");\n\n        auto ret = _pending_mutation;\n        _pending_mutation = nullptr;\n        _current_op_count++;\n        return ret;\n    } else {\n        _current_op_count++;\n        return unlink_next_workload();\n    }\n}\n\nmutation_ptr mutation_queue::check_possible_work(int current_running_count)\n{\n    _current_op_count = current_running_count;\n\n    if (_current_op_count >= _max_concurrent_op)\n        return nullptr;\n\n    // no further workload\n    if (_hdr.is_empty()) {\n        if (_pending_mutation != nullptr) {\n            auto ret = _pending_mutation;\n            _pending_mutation = nullptr;\n            _current_op_count++;\n            return ret;\n        } else {\n            return nullptr;\n        }\n    }\n\n    // run further workload\n    else {\n        _current_op_count++;\n        return unlink_next_workload();\n    }\n}\n\nvoid mutation_queue::clear()\n{\n    if (_pending_mutation != nullptr) {\n        _pending_mutation = nullptr;\n    }\n\n    mutation_ptr r;\n    while ((r = unlink_next_workload()) != nullptr) {\n    }\n}\n\nvoid mutation_queue::clear(std::vector<mutation_ptr> &queued_mutations)\n{\n    mutation_ptr r;\n    queued_mutations.clear();\n    while ((r = unlink_next_workload()) != nullptr) {\n        queued_mutations.emplace_back(r);\n    }\n\n    if (_pending_mutation != nullptr) {\n        queued_mutations.emplace_back(std::move(_pending_mutation));\n        _pending_mutation = nullptr;\n    }\n\n    // we don't reset the current_op_count, coz this is handled by\n    // check_possible_work. In which, the variable current_running_count\n    // is handled by prepare_list\n    // _current_op_count = 0;\n}\n}\n} // namespace end\n"
  },
  {
    "path": "src/replica/mutation.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include \"common/replication_common.h\"\n#include <list>\n#include <atomic>\n#include <dsn/utility/link.h>\n#include <dsn/utils/latency_tracer.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation;\ntypedef dsn::ref_ptr<mutation> mutation_ptr;\n\n// mutation is the 2pc unit of PacificA, which wraps one or more client requests and add\n// header informations related to PacificA algorithm for them.\n// both header and client request content are put into \"data\" member.\nclass mutation : public ref_counter\n{\npublic:\n    mutation();\n    virtual ~mutation();\n\n    // copy mutation from an existing mutation, typically used in partition split\n    // mutation should not reply to client, because parent has already replied\n    static mutation_ptr copy_no_reply(const mutation_ptr &old_mu);\n\n    // state inquery\n    const char *name() const { return _name; }\n    const uint64_t tid() const { return _tid; }\n    bool is_logged() const { return _not_logged == 0; }\n    bool is_ready_for_commit() const { return _private0 == 0; }\n    const std::vector<dsn::message_ex *> &prepare_requests() const { return _prepare_requests; }\n    void add_prepare_request(dsn::message_ex *request)\n    {\n        if (nullptr != request) {\n            _prepare_requests.push_back(request);\n            request->add_ref(); // released on dctor\n        }\n    }\n    unsigned int left_secondary_ack_count() const { return _left_secondary_ack_count; }\n    unsigned int left_potential_secondary_ack_count() const\n    {\n        return _left_potential_secondary_ack_count;\n    }\n    bool is_child_acked() const { return !_wait_child; }\n    bool is_error_acked() const { return _is_error_acked; }\n    ::dsn::task_ptr &log_task() { return _log_task; }\n    node_tasks &remote_tasks() { return _prepare_or_commit_tasks; }\n    bool is_prepare_close_to_timeout(int gap_ms, int timeout_ms)\n    {\n        return dsn_now_ms() + gap_ms >= _prepare_ts_ms + timeout_ms;\n    }\n    uint64_t create_ts_ns() const { return _create_ts_ns; }\n    ballot get_ballot() const { return data.header.ballot; }\n    decree get_decree() const { return data.header.decree; }\n\n    // state change\n    void set_id(ballot b, decree c);\n    void set_timestamp(int64_t timestamp) { data.header.timestamp = timestamp; }\n    void add_client_request(task_code code, dsn::message_ex *request);\n    void copy_from(mutation_ptr &old);\n    void set_logged()\n    {\n        dassert(!is_logged(), \"\");\n        _not_logged = 0;\n    }\n    unsigned int decrease_left_secondary_ack_count() { return --_left_secondary_ack_count; }\n    unsigned int decrease_left_potential_secondary_ack_count()\n    {\n        return --_left_potential_secondary_ack_count;\n    }\n    void set_left_secondary_ack_count(unsigned int count) { _left_secondary_ack_count = count; }\n    void set_left_potential_secondary_ack_count(unsigned int count)\n    {\n        _left_potential_secondary_ack_count = count;\n    }\n    void wait_child() { _wait_child = true; }\n    void child_acked() { _wait_child = false; }\n    void set_error_acked() { _is_error_acked = true; }\n    int clear_prepare_or_commit_tasks();\n    void wait_log_task() const;\n    uint64_t prepare_ts_ms() const { return _prepare_ts_ms; }\n    void set_prepare_ts() { _prepare_ts_ms = dsn_now_ms(); }\n\n    // >= 1 MB\n    bool is_full() const { return _appro_data_bytes >= 1024 * 1024; }\n    int appro_data_bytes() const { return _appro_data_bytes; }\n\n    // read & write mutation data\n    //\n    // \"mutation_update.code\" should be marshalled as string for cross-process compatiblity,\n    // because:\n    //   - the private log may be transfered to other node with different program\n    //   - the private/shared log may be replayed by different program when server restart\n    void write_to(const std::function<void(const blob &)> &inserter) const;\n    void write_to(binary_writer &writer, dsn::message_ex *to) const;\n    static mutation_ptr read_from(binary_reader &reader, dsn::message_ex *from);\n\n    static void write_mutation_header(binary_writer &writer, const mutation_header &header);\n    static void read_mutation_header(binary_reader &reader, mutation_header &header);\n\n    // data\n    mutation_data data;\n\n    // user requests\n    std::vector<dsn::message_ex *> client_requests;\n\n    // used by pending mutation queue only\n    mutation *next;\n\n    std::shared_ptr<dsn::utils::latency_tracer> _tracer;\n\n    void set_is_sync_to_child(bool sync_to_child) { _is_sync_to_child = sync_to_child; }\n    bool is_sync_to_child() { return _is_sync_to_child; }\n\nprivate:\n    union\n    {\n        struct\n        {\n            unsigned int _not_logged : 1;\n            unsigned int _left_secondary_ack_count : 15;\n            unsigned int _left_potential_secondary_ack_count : 14;\n            // Used for partition split\n            // _wait_child = true : child prepare mutation synchronously, its parent should wait for\n            // child ack\n            bool _wait_child : 1;\n            // Used for partition split\n            // when prepare failed when child prepare mutation synchronously, secondary may try to\n            // ack to primary twice, we use _is_error_acked to restrict only ack once\n            bool _is_error_acked : 1;\n        };\n        uint32_t _private0;\n    };\n\n    uint64_t _prepare_ts_ms;\n    ::dsn::task_ptr _log_task;\n    node_tasks _prepare_or_commit_tasks;\n    std::vector<dsn::message_ex *> _prepare_requests; // may combine duplicate requests\n    char _name[60];                                   // app_id.partition_index.ballot.decree\n    int _appro_data_bytes;\n    uint64_t _create_ts_ns; // for profiling\n    uint64_t _tid;          // trace id, unique in process\n    static std::atomic<uint64_t> s_tid;\n    bool _is_sync_to_child; // for partition split\n};\n\nclass replica;\n// mutation queue are queues for mutations waiting to send.\n// more precisely: for client requests waiting to send.\n// mutations are queued as \"_hdr + _pending_mutation\". that is to say, _hdr.first is the first\n// element in the queue, and pending_mutations is the last.\n//\n// we keep 2 structure \"hdr\" and \"pending_mutation\" coz:\n// 1. as a container of client requests, capacity of a mutation is limited, so incoming client\n//    requets should be packed into different mutations\n// 2. number of preparing mutations is also limited, so we should queue new created mutations and\n//    try to send them as soon as the concurrent condition satisfies.\nclass mutation_queue\n{\npublic:\n    mutation_queue(gpid gpid, int max_concurrent_op = 2, bool batch_write_disabled = false);\n\n    ~mutation_queue()\n    {\n        clear();\n        dassert(_hdr.is_empty(),\n                \"work queue is deleted when there are still %d running ops or pending work items \"\n                \"in queue\",\n                _current_op_count);\n    }\n\n    mutation_ptr add_work(task_code code, dsn::message_ex *request, replica *r);\n\n    void clear();\n    // called when you want to clear the mutation_queue and want to get the remaining messages\n    void clear(std::vector<mutation_ptr> &queued_mutations);\n\n    // called when the curren operation is completed or replica configuration is change,\n    // which triggers further round of operations as returned\n    mutation_ptr check_possible_work(int current_running_count);\n\nprivate:\n    mutation_ptr unlink_next_workload()\n    {\n        mutation_ptr r = _hdr.pop_one();\n        if (r.get() != nullptr) {\n            r->release_ref(); // added in add_work\n            --(*_pcount);\n        }\n        return r;\n    }\n\n    void reset_max_concurrent_ops(int max_c) { _max_concurrent_op = max_c; }\n\nprivate:\n    int _current_op_count;\n    int _max_concurrent_op;\n    bool _batch_write_disabled;\n\n    volatile int *_pcount;\n    mutation_ptr _pending_mutation;\n    slist<mutation> _hdr;\n};\n}\n} // namespace\n"
  },
  {
    "path": "src/replica/mutation_cache.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"mutation_cache.h\"\n#include \"mutation.h\"\n\nnamespace dsn {\nnamespace replication {\n\nmutation_cache::mutation_cache(decree init_decree, int max_count)\n{\n    _max_count = max_count;\n    _array.resize(max_count, nullptr);\n\n    reset(init_decree, false);\n}\n\nmutation_cache::mutation_cache(const mutation_cache &cache)\n{\n    _array.clear();\n    _array.reserve(cache._array.size());\n    for (const mutation_ptr &old_mu : cache._array) {\n        _array.emplace_back(old_mu == nullptr ? nullptr : mutation::copy_no_reply(old_mu));\n    }\n\n    _max_count = cache._max_count;\n    _interval = cache._interval;\n    _start_idx = cache._start_idx;\n    _end_idx = cache._end_idx;\n    _start_decree = cache._start_decree;\n    _end_decree.store(cache._end_decree.load());\n}\n\nmutation_cache::~mutation_cache() { _array.clear(); }\n\nerror_code mutation_cache::put(mutation_ptr &mu)\n{\n    decree decree = mu->data.header.decree;\n    int delta = 0, tag = 0;\n    if (_interval == 0) {\n        delta = 1;\n        tag = 0;\n    } else if (decree > _end_decree) {\n        delta = static_cast<int>(decree - _end_decree);\n        tag = 1;\n    } else if (decree < _start_decree) {\n        delta = static_cast<int>(_start_decree - decree);\n        tag = -1;\n    }\n\n    if (delta + _interval > _max_count) {\n        return ERR_CAPACITY_EXCEEDED;\n    }\n\n    int idx = ((decree - _end_decree) + _end_idx + _max_count) % _max_count;\n    mutation_ptr &old = _array[idx];\n    if (old != nullptr) {\n        dassert(old->data.header.ballot <= mu->data.header.ballot,\n                \"%\" PRId64 \" VS %\" PRId64 \"\",\n                old->data.header.ballot,\n                mu->data.header.ballot);\n    }\n\n    _array[idx] = mu;\n\n    // update tracking data\n    _interval += delta;\n\n    if (tag > 0) {\n        _end_idx = idx;\n        _end_decree = decree;\n    } else if (tag < 0) {\n        _start_idx = idx;\n        _start_decree = decree;\n    } else if (_interval == 1) {\n        _start_idx = _end_idx = idx;\n        _start_decree = _end_decree = decree;\n    }\n    return ERR_OK;\n}\n\nmutation_ptr mutation_cache::pop_min()\n{\n    if (_interval > 0) {\n        mutation_ptr mu = _array[_start_idx];\n        _array[_start_idx] = nullptr;\n\n        _interval--;\n        _start_idx = (_start_idx + 1) % _max_count;\n\n        if (_interval == 0) {\n            // TODO: FIXE ME LATER\n            // dassert (_total_size_bytes == 0, \"\");\n\n            _end_decree = _start_decree;\n            _end_idx = _start_idx;\n        } else {\n            _start_decree++;\n        }\n        return mu;\n    } else {\n        return nullptr;\n    }\n}\n\nvoid mutation_cache::reset(decree init_decree, bool clear_mutations)\n{\n    _start_decree = _end_decree = init_decree;\n    _start_idx = _end_idx = 0;\n    _interval = 0;\n\n    if (clear_mutations) {\n        for (int i = 0; i < _max_count; i++)\n            _array[i] = nullptr;\n    }\n}\n\nmutation_ptr mutation_cache::get_mutation_by_decree(decree decree)\n{\n    if (decree < _start_decree || decree > _end_decree)\n        return nullptr;\n    else\n        return _array[(_start_idx + (decree - _start_decree) + _max_count) % _max_count];\n}\n}\n} // namespace end\n"
  },
  {
    "path": "src/replica/mutation_cache.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"common/replication_common.h\"\n#include \"mutation.h\"\n#include <vector>\n#include <atomic>\n\nnamespace dsn {\nnamespace replication {\n\n// mutation_cache is an in-memory array that stores a limited number\n// (SEE replication_options::max_mutation_count_in_prepare_list) of mutation log entries.\n//\n// Inherited by: prepare_list\nclass mutation_cache\n{\npublic:\n    mutation_cache(decree init_decree, int max_count);\n    // only used when copy mutations whose client_request will not reply\n    mutation_cache(const mutation_cache &cache);\n    ~mutation_cache();\n\n    error_code put(mutation_ptr &mu);\n    mutation_ptr pop_min();\n    mutation_ptr get_mutation_by_decree(decree decree);\n    void reset(decree init_decree, bool clear_mutations);\n\n    decree min_decree() const { return _start_decree; }\n    decree max_decree() const { return _end_decree; }\n    int count() const { return _interval; }\n    int capacity() const { return _max_count; }\n\nprivate:\n    friend class mutation_batch_test;\n\n    std::vector<mutation_ptr> _array;\n    int _max_count;\n\n    int _interval;\n\n    int _start_idx;\n    int _end_idx;\n    decree _start_decree;\n    std::atomic<decree> _end_decree;\n};\n}\n} // namespace\n"
  },
  {
    "path": "src/replica/mutation_log.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"mutation_log.h\"\n#include \"replica.h\"\n#include \"mutation_log_utils.h\"\n\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/crc.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/async_calls.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DEFINE_bool(\"replication\",\n                plog_force_flush,\n                false,\n                \"when write private log, whether to flush file after write done\");\n\n::dsn::task_ptr mutation_log_shared::append(mutation_ptr &mu,\n                                            dsn::task_code callback_code,\n                                            dsn::task_tracker *tracker,\n                                            aio_handler &&callback,\n                                            int hash,\n                                            int64_t *pending_size)\n{\n    auto d = mu->data.header.decree;\n    ::dsn::aio_task_ptr cb =\n        callback ? file::create_aio_task(\n                       callback_code, tracker, std::forward<aio_handler>(callback), hash)\n                 : nullptr;\n\n    _slock.lock();\n\n    ADD_POINT(mu->_tracer);\n    // init pending buffer\n    if (nullptr == _pending_write) {\n        _pending_write = std::make_shared<log_appender>(mark_new_offset(0, true).second);\n    }\n    _pending_write->append_mutation(mu, cb);\n\n    // update meta\n    update_max_decree(mu->data.header.pid, d);\n\n    // start to write if possible\n    if (!_is_writing.load(std::memory_order_acquire)) {\n        write_pending_mutations(true);\n        if (pending_size) {\n            *pending_size = 0;\n        }\n    } else {\n        if (pending_size) {\n            *pending_size = _pending_write->size();\n        }\n        _slock.unlock();\n    }\n    return cb;\n}\n\nvoid mutation_log_shared::flush() { flush_internal(-1); }\n\nvoid mutation_log_shared::flush_once() { flush_internal(1); }\n\nvoid mutation_log_shared::flush_internal(int max_count)\n{\n    int count = 0;\n    while (max_count <= 0 || count < max_count) {\n        if (_is_writing.load(std::memory_order_acquire)) {\n            _tracker.wait_outstanding_tasks();\n        } else {\n            _slock.lock();\n            if (_is_writing.load(std::memory_order_acquire)) {\n                _slock.unlock();\n                continue;\n            }\n            if (!_pending_write) {\n                // !_is_writing && !_pending_write, means flush done\n                _slock.unlock();\n                break;\n            }\n            // !_is_writing && _pending_write, start next write\n            write_pending_mutations(true);\n            count++;\n        }\n    }\n}\n\nvoid mutation_log_shared::write_pending_mutations(bool release_lock_required)\n{\n    dassert(release_lock_required, \"lock must be hold at this point\");\n    dassert(!_is_writing.load(std::memory_order_relaxed), \"\");\n    dassert(_pending_write != nullptr, \"\");\n    dassert(_pending_write->size() > 0, \"pending write size = %d\", (int)_pending_write->size());\n    auto pr = mark_new_offset(_pending_write->size(), false);\n    dcheck_eq(pr.second, _pending_write->start_offset());\n\n    _is_writing.store(true, std::memory_order_release);\n\n    // move or reset pending variables\n    auto pending = std::move(_pending_write);\n\n    // seperate commit_log_block from within the lock\n    _slock.unlock();\n    commit_pending_mutations(pr.first, pending);\n}\n\nvoid mutation_log_shared::commit_pending_mutations(log_file_ptr &lf,\n                                                   std::shared_ptr<log_appender> &pending)\n{\n    if (utils::FLAGS_enable_latency_tracer) {\n        for (auto &mu : pending->mutations()) {\n            ADD_POINT(mu->_tracer);\n        }\n    }\n    lf->commit_log_blocks( // forces a new line for params\n        *pending,\n        LPC_WRITE_REPLICATION_LOG_SHARED,\n        &_tracker,\n        [this, lf, pending](error_code err, size_t sz) mutable {\n            dassert(_is_writing.load(std::memory_order_relaxed), \"\");\n\n            if (utils::FLAGS_enable_latency_tracer) {\n                for (auto &mu : pending->mutations()) {\n                    ADD_CUSTOM_POINT(mu->_tracer, \"commit_pending_completed\");\n                }\n            }\n\n            for (auto &block : pending->all_blocks()) {\n                auto hdr = (log_block_header *)block.front().data();\n                dassert(hdr->magic == 0xdeadbeef, \"header magic is changed: 0x%x\", hdr->magic);\n            }\n\n            if (err == ERR_OK) {\n                dcheck_eq(sz, pending->size());\n\n                if (_force_flush) {\n                    // flush to ensure that shared log data synced to disk\n                    //\n                    // FIXME : the file could have been closed\n                    lf->flush();\n                }\n\n                if (_write_size_counter) {\n                    (*_write_size_counter)->add(sz);\n                }\n            } else {\n                derror(\"write shared log failed, err = %s\", err.to_string());\n            }\n\n            // here we use _is_writing instead of _issued_write.expired() to check writing done,\n            // because the following callbacks may run before \"block\" released, which may cause\n            // the next init_prepare() not starting the write.\n            _is_writing.store(false, std::memory_order_relaxed);\n\n            // notify the callbacks\n            // ATTENTION: callback may be called before this code block executed done.\n            for (auto &c : pending->callbacks()) {\n                c->enqueue(err, sz);\n            }\n\n            // start to write next if possible\n            if (err == ERR_OK) {\n                _slock.lock();\n\n                if (!_is_writing.load(std::memory_order_acquire) && _pending_write) {\n                    write_pending_mutations(true);\n                } else {\n                    _slock.unlock();\n                }\n            }\n        },\n        0);\n}\n\n////////////////////////////////////////////////////\n\nmutation_log_private::mutation_log_private(const std::string &dir,\n                                           int32_t max_log_file_mb,\n                                           gpid gpid,\n                                           replica *r)\n    : mutation_log(dir, max_log_file_mb, gpid, r), replica_base(r)\n{\n    mutation_log_private::init_states();\n}\n\n::dsn::task_ptr mutation_log_private::append(mutation_ptr &mu,\n                                             dsn::task_code callback_code,\n                                             dsn::task_tracker *tracker,\n                                             aio_handler &&callback,\n                                             int hash,\n                                             int64_t *pending_size)\n{\n    dsn::aio_task_ptr cb =\n        callback ? file::create_aio_task(\n                       callback_code, tracker, std::forward<aio_handler>(callback), hash)\n                 : nullptr;\n\n    _plock.lock();\n\n    ADD_POINT(mu->_tracer);\n\n    // init pending buffer\n    if (nullptr == _pending_write) {\n        _pending_write = make_unique<log_appender>(mark_new_offset(0, true).second);\n    }\n    _pending_write->append_mutation(mu, cb);\n\n    // update meta\n    _pending_write_max_commit =\n        std::max(_pending_write_max_commit, mu->data.header.last_committed_decree);\n    _pending_write_max_decree = std::max(_pending_write_max_decree, mu->data.header.decree);\n\n    // start to write if possible\n    if (!_is_writing.load(std::memory_order_acquire)) {\n        write_pending_mutations(true);\n        if (pending_size) {\n            *pending_size = 0;\n        }\n    } else {\n        if (pending_size) {\n            *pending_size = _pending_write->size();\n        }\n        _plock.unlock();\n    }\n    return cb;\n}\n\nbool mutation_log_private::get_learn_state_in_memory(decree start_decree,\n                                                     binary_writer &writer) const\n{\n    std::shared_ptr<log_appender> issued_write;\n    mutations pending_mutations;\n    {\n        zauto_lock l(_plock);\n\n        issued_write = _issued_write.lock();\n\n        if (_pending_write) {\n            pending_mutations = _pending_write->mutations();\n        }\n    }\n\n    int learned_count = 0;\n\n    if (issued_write) {\n        for (auto &mu : issued_write->mutations()) {\n            if (mu->get_decree() >= start_decree) {\n                mu->write_to(writer, nullptr);\n                learned_count++;\n            }\n        }\n    }\n\n    for (auto &mu : pending_mutations) {\n        if (mu->get_decree() >= start_decree) {\n            mu->write_to(writer, nullptr);\n            learned_count++;\n        }\n    }\n\n    return learned_count > 0;\n}\n\nvoid mutation_log_private::get_in_memory_mutations(decree start_decree,\n                                                   ballot start_ballot,\n                                                   std::vector<mutation_ptr> &mutation_list) const\n{\n    std::shared_ptr<log_appender> issued_write;\n    mutations pending_mutations;\n    {\n        zauto_lock l(_plock);\n        issued_write = _issued_write.lock();\n        if (_pending_write) {\n            pending_mutations = _pending_write->mutations();\n        }\n    }\n\n    if (issued_write) {\n        for (auto &mu : issued_write->mutations()) {\n            // if start_ballot is invalid or equal to mu.ballot, check decree\n            // otherwise check ballot\n            ballot current_ballot =\n                (start_ballot == invalid_ballot) ? invalid_ballot : mu->get_ballot();\n            if ((mu->get_decree() >= start_decree && start_ballot == current_ballot) ||\n                current_ballot > start_ballot) {\n                mutation_list.push_back(mutation::copy_no_reply(mu));\n            }\n        }\n    }\n\n    for (auto &mu : pending_mutations) {\n        // if start_ballot is invalid or equal to mu.ballot, check decree\n        // otherwise check ballot\n        ballot current_ballot =\n            (start_ballot == invalid_ballot) ? invalid_ballot : mu->get_ballot();\n        if ((mu->get_decree() >= start_decree && start_ballot == current_ballot) ||\n            current_ballot > start_ballot) {\n            mutation_list.push_back(mutation::copy_no_reply(mu));\n        }\n    }\n}\n\nvoid mutation_log_private::flush() { flush_internal(-1); }\n\nvoid mutation_log_private::flush_once() { flush_internal(1); }\n\nvoid mutation_log_private::flush_internal(int max_count)\n{\n    int count = 0;\n    while (max_count <= 0 || count < max_count) {\n        if (_is_writing.load(std::memory_order_acquire)) {\n            _tracker.wait_outstanding_tasks();\n        } else {\n            _plock.lock();\n            if (_is_writing.load(std::memory_order_acquire)) {\n                _plock.unlock();\n                continue;\n            }\n            if (!_pending_write) {\n                // !_is_writing && !_pending_write, means flush done\n                _plock.unlock();\n                break;\n            }\n            // !_is_writing && _pending_write, start next write\n            write_pending_mutations(true);\n            count++;\n        }\n    }\n}\n\nvoid mutation_log_private::init_states()\n{\n    mutation_log::init_states();\n\n    _is_writing.store(false, std::memory_order_release);\n    _issued_write.reset();\n    _pending_write = nullptr;\n    _pending_write_max_commit = 0;\n    _pending_write_max_decree = 0;\n}\n\nvoid mutation_log_private::write_pending_mutations(bool release_lock_required)\n{\n    dassert(release_lock_required, \"lock must be hold at this point\");\n    dassert(!_is_writing.load(std::memory_order_relaxed), \"\");\n    dassert(_pending_write != nullptr, \"\");\n    dassert(_pending_write->size() > 0, \"pending write size = %d\", (int)_pending_write->size());\n    auto pr = mark_new_offset(_pending_write->size(), false);\n    dcheck_eq_replica(pr.second, _pending_write->start_offset());\n\n    _is_writing.store(true, std::memory_order_release);\n\n    update_max_decree(_private_gpid, _pending_write_max_decree);\n\n    // move or reset pending variables\n    std::shared_ptr<log_appender> pending = std::move(_pending_write);\n    _issued_write = pending;\n    decree max_commit = _pending_write_max_commit;\n    _pending_write_max_commit = 0;\n    _pending_write_max_decree = 0;\n\n    // Free plog from lock during committing log block, in the meantime\n    // new mutations can still be appended.\n    _plock.unlock();\n    commit_pending_mutations(pr.first, pending, max_commit);\n}\n\nvoid mutation_log_private::commit_pending_mutations(log_file_ptr &lf,\n                                                    std::shared_ptr<log_appender> &pending,\n                                                    decree max_commit)\n{\n    if (dsn_unlikely(utils::FLAGS_enable_latency_tracer)) {\n        for (const auto &mu : pending->mutations()) {\n            ADD_POINT(mu->_tracer);\n        }\n    }\n\n    lf->commit_log_blocks(\n        *pending,\n        LPC_WRITE_REPLICATION_LOG_PRIVATE,\n        &_tracker,\n        [this, lf, pending, max_commit](error_code err, size_t sz) mutable {\n            dassert(_is_writing.load(std::memory_order_relaxed), \"\");\n\n            for (auto &block : pending->all_blocks()) {\n                auto hdr = (log_block_header *)block.front().data();\n                dassert(hdr->magic == 0xdeadbeef, \"header magic is changed: 0x%x\", hdr->magic);\n            }\n\n            if (dsn_unlikely(utils::FLAGS_enable_latency_tracer)) {\n                for (const auto &mu : pending->mutations()) {\n                    ADD_CUSTOM_POINT(mu->_tracer, \"commit_pending_completed\");\n                }\n            }\n\n            // notify the callbacks\n            // ATTENTION: callback may be called before this code block executed done.\n            for (auto &c : pending->callbacks()) {\n                c->enqueue(err, sz);\n            }\n\n            if (err != ERR_OK) {\n                derror(\"write private log failed, err = %s\", err.to_string());\n                _is_writing.store(false, std::memory_order_relaxed);\n                if (_io_error_callback) {\n                    _io_error_callback(err);\n                }\n                return;\n            }\n            dcheck_eq(sz, pending->size());\n\n            // flush to ensure that there is no gap between private log and in-memory buffer\n            // so that we can get all mutations in learning process.\n            //\n            // FIXME : the file could have been closed\n            if (FLAGS_plog_force_flush) {\n                lf->flush();\n            }\n\n            // update _private_max_commit_on_disk after written into log file done\n            update_max_commit_on_disk(max_commit);\n\n            _is_writing.store(false, std::memory_order_relaxed);\n\n            // start to write if possible\n            _plock.lock();\n\n            if (!_is_writing.load(std::memory_order_acquire) && _pending_write) {\n                write_pending_mutations(true);\n            } else {\n                _plock.unlock();\n            }\n        },\n        get_gpid().thread_hash());\n}\n\n///////////////////////////////////////////////////////////////\n\nmutation_log::mutation_log(const std::string &dir, int32_t max_log_file_mb, gpid gpid, replica *r)\n{\n    _dir = dir;\n    _is_private = (gpid.value() != 0);\n    _max_log_file_size_in_bytes = static_cast<int64_t>(max_log_file_mb) * 1024L * 1024L;\n    _min_log_file_size_in_bytes = _max_log_file_size_in_bytes / 10;\n    _owner_replica = r;\n    _private_gpid = gpid;\n\n    if (r) {\n        dassert(_private_gpid == r->get_gpid(),\n                \"(%d.%d) VS (%d.%d)\",\n                _private_gpid.get_app_id(),\n                _private_gpid.get_partition_index(),\n                r->get_gpid().get_app_id(),\n                r->get_gpid().get_partition_index());\n    }\n    mutation_log::init_states();\n}\n\nvoid mutation_log::init_states()\n{\n    _is_opened = false;\n    _switch_file_hint = false;\n    _switch_file_demand = false;\n\n    // logs\n    _last_file_index = 0;\n    _log_files.clear();\n    _current_log_file = nullptr;\n    _global_start_offset = 0;\n    _global_end_offset = 0;\n\n    // replica states\n    _shared_log_info_map.clear();\n    _private_log_info = {0, 0};\n    _private_max_commit_on_disk = 0;\n}\n\nerror_code mutation_log::open(replay_callback read_callback,\n                              io_failure_callback write_error_callback)\n{\n    std::map<gpid, decree> replay_condition;\n    return open(read_callback, write_error_callback, replay_condition);\n}\n\nerror_code mutation_log::open(replay_callback read_callback,\n                              io_failure_callback write_error_callback,\n                              const std::map<gpid, decree> &replay_condition)\n{\n    dassert(!_is_opened, \"cannot open a opened mutation_log\");\n    dassert(nullptr == _current_log_file, \"the current log file must be null at this point\");\n\n    // create dir if necessary\n    if (!dsn::utils::filesystem::path_exists(_dir)) {\n        if (!dsn::utils::filesystem::create_directory(_dir)) {\n            derror(\"open mutation_log: create log path failed\");\n            return ERR_FILE_OPERATION_FAILED;\n        }\n    }\n\n    // load the existing logs\n    _log_files.clear();\n    _io_error_callback = write_error_callback;\n\n    std::vector<std::string> file_list;\n    if (!dsn::utils::filesystem::get_subfiles(_dir, file_list, false)) {\n        derror(\"open mutation_log: get subfiles failed.\");\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    if (nullptr == read_callback) {\n        dassert(file_list.size() == 0, \"log must be empty if callback is not present\");\n    }\n\n    std::sort(file_list.begin(), file_list.end());\n\n    error_code err = ERR_OK;\n    for (auto &fpath : file_list) {\n        log_file_ptr log = log_file::open_read(fpath.c_str(), err);\n        if (log == nullptr) {\n            if (err == ERR_HANDLE_EOF || err == ERR_INCOMPLETE_DATA ||\n                err == ERR_INVALID_PARAMETERS) {\n                dwarn(\"skip file %s during log init, err = %s\", fpath.c_str(), err.to_string());\n                continue;\n            } else {\n                return err;\n            }\n        }\n\n        if (_is_private) {\n            ddebug(\"open private log %s succeed, start_offset = %\" PRId64 \", end_offset = %\" PRId64\n                   \", size = %\" PRId64 \", previous_max_decree = %\" PRId64,\n                   fpath.c_str(),\n                   log->start_offset(),\n                   log->end_offset(),\n                   log->end_offset() - log->start_offset(),\n                   log->previous_log_max_decree(_private_gpid));\n        } else {\n            ddebug(\"open shared log %s succeed, start_offset = %\" PRId64 \", end_offset = %\" PRId64\n                   \", size = %\" PRId64 \"\",\n                   fpath.c_str(),\n                   log->start_offset(),\n                   log->end_offset(),\n                   log->end_offset() - log->start_offset());\n        }\n\n        dassert(_log_files.find(log->index()) == _log_files.end(),\n                \"invalid log_index, index = %d\",\n                log->index());\n        _log_files[log->index()] = log;\n    }\n\n    file_list.clear();\n\n    // filter useless log\n    std::map<int, log_file_ptr>::iterator replay_begin = _log_files.begin();\n    std::map<int, log_file_ptr>::iterator replay_end = _log_files.end();\n    if (!replay_condition.empty()) {\n        if (_is_private) {\n            auto find = replay_condition.find(_private_gpid);\n            dassert(find != replay_condition.end(),\n                    \"invalid gpid(%d.%d)\",\n                    _private_gpid.get_app_id(),\n                    _private_gpid.get_partition_index());\n            for (auto it = _log_files.begin(); it != _log_files.end(); ++it) {\n                if (it->second->previous_log_max_decree(_private_gpid) <= find->second) {\n                    // previous logs can be ignored\n                    replay_begin = it;\n                } else {\n                    break;\n                }\n            }\n        } else {\n            // find the largest file which can be ignored.\n            // after iterate, the 'mark_it' will point to the largest file which can be ignored.\n            std::map<int, log_file_ptr>::reverse_iterator mark_it;\n            std::set<gpid> kickout_replicas;\n            replica_log_info_map max_decrees; // max_decrees for log file at mark_it.\n            for (mark_it = _log_files.rbegin(); mark_it != _log_files.rend(); ++mark_it) {\n                bool ignore_this = true;\n\n                if (mark_it == _log_files.rbegin()) {\n                    // the last file should not be ignored\n                    ignore_this = false;\n                }\n\n                if (ignore_this) {\n                    for (auto &kv : replay_condition) {\n                        if (kickout_replicas.find(kv.first) != kickout_replicas.end()) {\n                            // no need to consider this replica\n                            continue;\n                        }\n\n                        auto find = max_decrees.find(kv.first);\n                        if (find == max_decrees.end() || find->second.max_decree <= kv.second) {\n                            // can ignore for this replica\n                            kickout_replicas.insert(kv.first);\n                        } else {\n                            ignore_this = false;\n                            break;\n                        }\n                    }\n                }\n\n                if (ignore_this) {\n                    // found the largest file which can be ignored\n                    break;\n                }\n\n                // update max_decrees for the next log file\n                max_decrees = mark_it->second->previous_log_max_decrees();\n            }\n\n            if (mark_it != _log_files.rend()) {\n                // set replay_begin to the next position of mark_it.\n                replay_begin = _log_files.find(mark_it->first);\n                dassert(replay_begin != _log_files.end(),\n                        \"invalid log_index, index = %d\",\n                        mark_it->first);\n                replay_begin++;\n                dassert(replay_begin != _log_files.end(),\n                        \"invalid log_index, index = %d\",\n                        mark_it->first);\n            }\n        }\n\n        for (auto it = _log_files.begin(); it != replay_begin; it++) {\n            ddebug(\"ignore log %s\", it->second->path().c_str());\n        }\n    }\n\n    // replay with the found files\n    std::map<int, log_file_ptr> replay_logs(replay_begin, replay_end);\n    int64_t end_offset = 0;\n    err = replay(\n        replay_logs,\n        [this, read_callback](int log_length, mutation_ptr &mu) {\n            bool ret = true;\n\n            if (read_callback) {\n                ret = read_callback(log_length,\n                                    mu); // actually replica::replay_mutation(mu, true|false);\n            }\n\n            if (ret) {\n                this->update_max_decree_no_lock(mu->data.header.pid, mu->data.header.decree);\n                if (this->_is_private) {\n                    this->update_max_commit_on_disk_no_lock(mu->data.header.last_committed_decree);\n                }\n            }\n\n            return ret;\n        },\n        end_offset);\n\n    if (ERR_OK == err) {\n        _global_start_offset =\n            _log_files.size() > 0 ? _log_files.begin()->second->start_offset() : 0;\n        _global_end_offset = end_offset;\n        _last_file_index = _log_files.size() > 0 ? _log_files.rbegin()->first : 0;\n        _is_opened = true;\n    } else {\n        // clear\n        for (auto &kv : _log_files) {\n            kv.second->close();\n        }\n        _log_files.clear();\n        init_states();\n    }\n\n    return err;\n}\n\nvoid mutation_log::close()\n{\n    {\n        zauto_lock l(_lock);\n        if (!_is_opened) {\n            return;\n        }\n        _is_opened = false;\n    }\n\n    dinfo(\"close mutation log %s\", dir().c_str());\n\n    // make all data is on disk\n    flush();\n\n    {\n        zauto_lock l(_lock);\n\n        // close current log file\n        if (nullptr != _current_log_file) {\n            _current_log_file->close();\n            _current_log_file = nullptr;\n        }\n    }\n\n    // reset all states\n    init_states();\n}\n\nerror_code mutation_log::create_new_log_file()\n{\n    // create file\n    uint64_t start = dsn_now_ns();\n    log_file_ptr logf =\n        log_file::create_write(_dir.c_str(), _last_file_index + 1, _global_end_offset);\n    if (logf == nullptr) {\n        derror(\"cannot create log file with index %d\", _last_file_index + 1);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n    dassert(logf->end_offset() == logf->start_offset(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            logf->end_offset(),\n            logf->start_offset());\n    dassert(_global_end_offset == logf->end_offset(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            _global_end_offset,\n            logf->start_offset());\n    ddebug(\"create new log file %s succeed, time_used = %\" PRIu64 \" ns\",\n           logf->path().c_str(),\n           dsn_now_ns() - start);\n\n    // update states\n    _last_file_index++;\n    dassert(_log_files.find(_last_file_index) == _log_files.end(),\n            \"invalid log_offset, offset = %d\",\n            _last_file_index);\n    _log_files[_last_file_index] = logf;\n\n    // switch the current log file\n    // the old log file may be hold by _log_files or aio_task\n    _current_log_file = logf;\n\n    // create new pending buffer because we need write file header\n    // write file header into pending buffer\n    size_t header_len = 0;\n    binary_writer temp_writer;\n    if (_is_private) {\n        replica_log_info_map ds;\n        ds[_private_gpid] =\n            replica_log_info(_private_log_info.max_decree, _private_log_info.valid_start_offset);\n        header_len = logf->write_file_header(temp_writer, ds);\n    } else {\n        header_len = logf->write_file_header(temp_writer, _shared_log_info_map);\n    }\n\n    log_block *blk = new log_block();\n    blk->add(temp_writer.get_buffer());\n    _global_end_offset += blk->size();\n\n    logf->commit_log_block(*blk,\n                           _current_log_file->start_offset(),\n                           LPC_WRITE_REPLICATION_LOG_COMMON,\n                           &_tracker,\n                           [this, blk, logf](::dsn::error_code err, size_t sz) {\n                               delete blk;\n                               if (ERR_OK != err) {\n                                   derror(\n                                       \"write mutation log file header failed, file = %s, err = %s\",\n                                       logf->path().c_str(),\n                                       err.to_string());\n                                   if (_io_error_callback) {\n                                       _io_error_callback(err);\n                                   } else {\n                                       dassert(false, \"unhandled error\");\n                                   }\n                               }\n                           },\n                           0);\n\n    dassert(_global_end_offset ==\n                _current_log_file->start_offset() + sizeof(log_block_header) + header_len,\n            \"%\" PRId64 \" VS %\" PRId64 \"(%\" PRId64 \" + %d + %d)\",\n            _global_end_offset,\n            _current_log_file->start_offset() + sizeof(log_block_header) + header_len,\n            _current_log_file->start_offset(),\n            (int)sizeof(log_block_header),\n            (int)header_len);\n    return ERR_OK;\n}\n\nstd::pair<log_file_ptr, int64_t> mutation_log::mark_new_offset(size_t size,\n                                                               bool create_new_log_if_needed)\n{\n    zauto_lock l(_lock);\n\n    if (create_new_log_if_needed) {\n        bool create_file = false;\n        if (_current_log_file == nullptr) {\n            create_file = true;\n        } else {\n            int64_t file_size = _global_end_offset - _current_log_file->start_offset();\n            const char *reason = nullptr;\n\n            if (_switch_file_demand) {\n                create_file = true;\n                reason = \"demand\";\n            } else if (file_size >= _max_log_file_size_in_bytes) {\n                create_file = true;\n                reason = \"limit\";\n            } else if (_switch_file_hint && file_size >= _min_log_file_size_in_bytes) {\n                create_file = true;\n                reason = \"hint\";\n            }\n\n            if (create_file) {\n                ddebug(\"switch log file by %s, old_file = %s, size = %\" PRId64,\n                       reason,\n                       _current_log_file->path().c_str(),\n                       file_size);\n            }\n        }\n\n        if (create_file) {\n            auto ec = create_new_log_file();\n            dassert_f(ec == ERR_OK,\n                      \"{} create new log file failed: {}\",\n                      _is_private ? _private_gpid.to_string() : \"\",\n                      ec);\n            _switch_file_hint = false;\n            _switch_file_demand = false;\n        }\n    } else {\n        dassert(_current_log_file != nullptr, \"\");\n    }\n\n    int64_t write_start_offset = _global_end_offset;\n    _global_end_offset += size;\n\n    return std::make_pair(_current_log_file, write_start_offset);\n}\n\ndecree mutation_log::max_decree(gpid gpid) const\n{\n    zauto_lock l(_lock);\n    if (_is_private) {\n        dassert(gpid == _private_gpid, \"replica gpid does not match\");\n        return _private_log_info.max_decree;\n    } else {\n        auto it = _shared_log_info_map.find(gpid);\n        if (it != _shared_log_info_map.end())\n            return it->second.max_decree;\n        else\n            return 0;\n    }\n}\n\ndecree mutation_log::max_commit_on_disk() const\n{\n    zauto_lock l(_lock);\n    dassert(_is_private, \"this method is only valid for private logs\");\n    return _private_max_commit_on_disk;\n}\n\ndecree mutation_log::max_gced_decree(gpid gpid) const\n{\n    zauto_lock l(_lock);\n    return max_gced_decree_no_lock(gpid);\n}\n\ndecree mutation_log::max_gced_decree_no_lock(gpid gpid) const\n{\n    dassert(_is_private, \"\");\n\n    decree result = invalid_decree;\n    for (auto &log : _log_files) {\n        auto it = log.second->previous_log_max_decrees().find(gpid);\n        if (it != log.second->previous_log_max_decrees().end()) {\n            if (result == invalid_decree) {\n                result = it->second.max_decree;\n            } else {\n                result = std::min(result, it->second.max_decree);\n            }\n        }\n    }\n    return result;\n}\n\nvoid mutation_log::check_valid_start_offset(gpid gpid, int64_t valid_start_offset) const\n{\n    zauto_lock l(_lock);\n    if (_is_private) {\n        dassert(valid_start_offset == _private_log_info.valid_start_offset,\n                \"valid start offset mismatch: %\" PRId64 \" vs %\" PRId64,\n                valid_start_offset,\n                _private_log_info.valid_start_offset);\n    } else {\n        auto it = _shared_log_info_map.find(gpid);\n        if (it != _shared_log_info_map.end()) {\n            dassert(valid_start_offset == it->second.valid_start_offset,\n                    \"valid start offset mismatch: %\" PRId64 \" vs %\" PRId64,\n                    valid_start_offset,\n                    it->second.valid_start_offset);\n        }\n    }\n}\n\nint64_t mutation_log::total_size() const\n{\n    zauto_lock l(_lock);\n    return total_size_no_lock();\n}\n\nint64_t mutation_log::total_size_no_lock() const\n{\n    return _log_files.size() > 0 ? _global_end_offset - _global_start_offset : 0;\n}\n\nerror_code mutation_log::reset_from(const std::string &dir,\n                                    replay_callback replay_error_callback,\n                                    io_failure_callback write_error_callback)\n{\n    error_code err = ERR_FILE_OPERATION_FAILED;\n\n    // close for flushing current log and be ready to open new log files after reset\n    close();\n\n    // make sure logs in `dir` (such as /learn) are valid.\n    error_s es = log_utils::check_log_files_continuity(dir);\n    if (!es.is_ok()) {\n        derror_f(\"the log of source dir {} is invalid:{}, will remove it.\", dir, es);\n        if (!utils::filesystem::remove_path(dir)) {\n            derror_f(\"remove {} failed\", dir);\n            return err;\n        }\n        return es.code();\n    }\n\n    std::string temp_dir = _dir + '.' + std::to_string(dsn_now_ns());\n    if (!utils::filesystem::rename_path(_dir, temp_dir)) {\n        derror_f(\"rename {} to {} failed\", _dir, temp_dir);\n        return err;\n    }\n    ddebug_f(\"moved current log dir {}  to tmp_dir {}\", _dir, temp_dir);\n    // define `defer` for rollback temp_dir when failed or remove temp_dir when success\n    auto temp_dir_resolve = dsn::defer([this, err, temp_dir]() {\n        if (err != ERR_OK) {\n            if (!utils::filesystem::rename_path(temp_dir, _dir)) {\n                // rollback failed means old log files are not be recovered, it may be lost if only\n                // derror,  dassert for manual resolve it\n                dassert_f(\"rollback {} to {} failed\", temp_dir, _dir);\n            }\n        } else {\n            if (!dsn::utils::filesystem::remove_path(temp_dir)) {\n                // temp dir allow delete failed, it's only garbage\n                derror_f(\"remove temp dir {} failed\", temp_dir);\n            }\n        }\n    });\n\n    // move source dir to target dir\n    if (!utils::filesystem::rename_path(dir, _dir)) {\n        derror_f(\"rename {} to {} failed\", dir, _dir);\n        return err;\n    }\n    ddebug_f(\"move {} to {} as our new log directory\", dir, _dir);\n\n    // - make sure logs in moved dir(such as /plog) are valid and can be opened successfully.\n    // - re-open new log files  for loading the new log file and register the files into replica,\n    // please make sure the old log files has been closed\n    err = open(replay_error_callback, write_error_callback);\n    if (err != ERR_OK) {\n        derror_f(\"the logs of moved dir {} are invalid and open failed:{}\", _dir, err);\n    }\n    return err;\n}\n\nvoid mutation_log::set_valid_start_offset_on_open(gpid gpid, int64_t valid_start_offset)\n{\n    zauto_lock l(_lock);\n    if (_is_private) {\n        dassert(gpid == _private_gpid, \"replica gpid does not match\");\n        _private_log_info.valid_start_offset = valid_start_offset;\n    } else {\n        _shared_log_info_map[gpid] = replica_log_info(0, valid_start_offset);\n    }\n}\n\nint64_t mutation_log::on_partition_reset(gpid gpid, decree max_decree)\n{\n    zauto_lock l(_lock);\n    if (_is_private) {\n        dassert(_private_gpid == gpid, \"replica gpid does not match\");\n        replica_log_info old_info = _private_log_info;\n        _private_log_info.max_decree = max_decree;\n        _private_log_info.valid_start_offset = _global_end_offset;\n        dwarn(\"replica %d.%d has changed private log max_decree from %\" PRId64 \" to %\" PRId64\n              \", valid_start_offset from %\" PRId64 \" to %\" PRId64,\n              gpid.get_app_id(),\n              gpid.get_partition_index(),\n              old_info.max_decree,\n              _private_log_info.max_decree,\n              old_info.valid_start_offset,\n              _private_log_info.valid_start_offset);\n    } else {\n        replica_log_info info(max_decree, _global_end_offset);\n        auto it = _shared_log_info_map.insert(replica_log_info_map::value_type(gpid, info));\n        if (!it.second) {\n            dwarn(\"replica %d.%d has changed shared log max_decree from %\" PRId64 \" to %\" PRId64\n                  \", valid_start_offset from %\" PRId64 \" to %\" PRId64,\n                  gpid.get_app_id(),\n                  gpid.get_partition_index(),\n                  it.first->second.max_decree,\n                  info.max_decree,\n                  it.first->second.valid_start_offset,\n                  info.valid_start_offset);\n            _shared_log_info_map[gpid] = info;\n        }\n    }\n    return _global_end_offset;\n}\n\nvoid mutation_log::on_partition_removed(gpid gpid)\n{\n    dassert(!_is_private, \"this method is only valid for shared logs\");\n    zauto_lock l(_lock);\n    _shared_log_info_map.erase(gpid);\n}\n\nvoid mutation_log::update_max_decree(gpid gpid, decree d)\n{\n    zauto_lock l(_lock);\n    update_max_decree_no_lock(gpid, d);\n}\n\nvoid mutation_log::update_max_decree_no_lock(gpid gpid, decree d)\n{\n    if (!_is_private) {\n        auto it = _shared_log_info_map.find(gpid);\n        if (it != _shared_log_info_map.end()) {\n            if (it->second.max_decree < d) {\n                it->second.max_decree = d;\n            }\n        } else {\n            dassert(false, \"replica has not been registered in the log before\");\n        }\n    } else {\n        dcheck_eq(gpid, _private_gpid);\n        if (d > _private_log_info.max_decree) {\n            _private_log_info.max_decree = d;\n        }\n    }\n}\n\nvoid mutation_log::update_max_commit_on_disk(decree d)\n{\n    zauto_lock l(_lock);\n    update_max_commit_on_disk_no_lock(d);\n}\n\nvoid mutation_log::update_max_commit_on_disk_no_lock(decree d)\n{\n    dassert(_is_private, \"this method is only valid for private logs\");\n    if (d > _private_max_commit_on_disk) {\n        _private_max_commit_on_disk = d;\n    }\n}\n\nbool mutation_log::get_learn_state(gpid gpid, decree start, /*out*/ learn_state &state) const\n{\n    dassert(_is_private, \"this method is only valid for private logs\");\n    dassert(_private_gpid == gpid,\n            \"replica gpid does not match, (%d.%d) VS (%d.%d)\",\n            _private_gpid.get_app_id(),\n            _private_gpid.get_partition_index(),\n            gpid.get_app_id(),\n            gpid.get_partition_index());\n\n    binary_writer temp_writer;\n    if (get_learn_state_in_memory(start, temp_writer)) {\n        state.meta = temp_writer.get_buffer();\n    }\n\n    std::map<int, log_file_ptr> files;\n    {\n        zauto_lock l(_lock);\n\n        if (state.meta.length() == 0 && start > _private_log_info.max_decree) {\n            // no memory data and no disk data\n            ddebug_f(\"gpid({}) get_learn_state returns false\"\n                     \"learn_start_decree={}, max_decree_in_private_log={}\",\n                     gpid,\n                     start,\n                     _private_log_info.max_decree);\n            return false;\n        }\n\n        files = _log_files;\n    }\n\n    // find all applicable files\n    bool skip_next = false;\n    std::list<std::string> learn_files;\n    log_file_ptr log;\n    decree last_max_decree = 0;\n    int learned_file_head_index = 0;\n    int learned_file_tail_index = 0;\n    int64_t learned_file_start_offset = 0;\n    for (auto itr = files.rbegin(); itr != files.rend(); ++itr) {\n        log = itr->second;\n        if (log->end_offset() <= _private_log_info.valid_start_offset)\n            break;\n\n        if (skip_next) {\n            skip_next = (log->previous_log_max_decrees().size() == 0);\n            continue;\n        }\n\n        if (log->end_offset() > log->start_offset()) {\n            // not empty file\n            learn_files.push_back(log->path());\n            if (learned_file_tail_index == 0)\n                learned_file_tail_index = log->index();\n            learned_file_head_index = log->index();\n            learned_file_start_offset = log->start_offset();\n        }\n\n        skip_next = (log->previous_log_max_decrees().size() == 0);\n\n        // continue checking as this file may be a fault\n        if (skip_next)\n            continue;\n\n        last_max_decree = log->previous_log_max_decrees().begin()->second.max_decree;\n\n        // when all possible decrees are not needed\n        if (last_max_decree < start) {\n            // skip all older logs\n            break;\n        }\n    }\n\n    // reverse the order, to make files ordered by index incrementally\n    state.files.reserve(learn_files.size());\n    for (auto it = learn_files.rbegin(); it != learn_files.rend(); ++it) {\n        state.files.push_back(*it);\n    }\n\n    bool ret = (learned_file_start_offset >= _private_log_info.valid_start_offset &&\n                last_max_decree > 0 && last_max_decree < start);\n    ddebug(\"gpid(%d.%d) get_learn_state returns %s, \"\n           \"private logs count %d (%d => %d), learned files count %d (%d => %d): \"\n           \"learned_file_start_offset(%\" PRId64 \") >= valid_start_offset(%\" PRId64 \") && \"\n           \"last_max_decree(%\" PRId64 \") > 0 && last_max_decree(%\" PRId64\n           \") < learn_start_decree(%\" PRId64 \")\",\n           gpid.get_app_id(),\n           gpid.get_partition_index(),\n           ret ? \"true\" : \"false\",\n           (int)files.size(),\n           files.empty() ? 0 : files.begin()->first,\n           files.empty() ? 0 : files.rbegin()->first,\n           (int)learn_files.size(),\n           learned_file_head_index,\n           learned_file_tail_index,\n           learned_file_start_offset,\n           _private_log_info.valid_start_offset,\n           last_max_decree,\n           last_max_decree,\n           start);\n\n    return ret;\n}\n\nvoid mutation_log::get_parent_mutations_and_logs(gpid pid,\n                                                 decree start_decree,\n                                                 ballot start_ballot,\n                                                 std::vector<mutation_ptr> &mutation_list,\n                                                 std::vector<std::string> &files,\n                                                 uint64_t &total_file_size) const\n{\n    dassert(_is_private, \"this method is only valid for private logs\");\n    dcheck_eq(_private_gpid, pid);\n\n    mutation_list.clear();\n    files.clear();\n    total_file_size = 0;\n\n    get_in_memory_mutations(start_decree, start_ballot, mutation_list);\n\n    if (mutation_list.size() == 0 && start_decree > _private_log_info.max_decree) {\n        // no memory data and no disk data\n        return;\n    }\n    std::map<int, log_file_ptr> file_map = get_log_file_map();\n\n    bool skip_next = false;\n    std::list<std::string> learn_files;\n    decree last_max_decree = 0;\n    for (auto itr = file_map.rbegin(); itr != file_map.rend(); ++itr) {\n        log_file_ptr &log = itr->second;\n        if (log->end_offset() <= _private_log_info.valid_start_offset)\n            break;\n\n        if (skip_next) {\n            skip_next = (log->previous_log_max_decrees().size() == 0);\n            continue;\n        }\n\n        if (log->end_offset() > log->start_offset()) {\n            // not empty file\n            learn_files.push_back(log->path());\n            total_file_size += (log->end_offset() - log->start_offset());\n        }\n\n        skip_next = (log->previous_log_max_decrees().size() == 0);\n        // continue checking as this file may be a fault\n        if (skip_next)\n            continue;\n\n        last_max_decree = log->previous_log_max_decrees().begin()->second.max_decree;\n        // when all possible decrees are not needed\n        if (last_max_decree < start_decree) {\n            // skip all older logs\n            break;\n        }\n    }\n\n    // reverse the order, to make files ordered by index incrementally\n    files.reserve(learn_files.size());\n    for (auto it = learn_files.rbegin(); it != learn_files.rend(); ++it) {\n        files.push_back(*it);\n    }\n}\n\n// return true if the file is covered by both reserve_max_size and reserve_max_time\nstatic bool should_reserve_file(log_file_ptr log,\n                                int64_t already_reserved_size,\n                                int64_t reserve_max_size,\n                                int64_t reserve_max_time)\n{\n    if (reserve_max_size == 0 || reserve_max_time == 0)\n        return false;\n\n    int64_t file_size = log->end_offset() - log->start_offset();\n    if (already_reserved_size + file_size > reserve_max_size) {\n        // already exceed size limit, should not reserve\n        return false;\n    }\n\n    uint64_t file_last_write_time = log->last_write_time();\n    if (file_last_write_time == 0) {\n        time_t tm;\n        if (!dsn::utils::filesystem::last_write_time(log->path(), tm)) {\n            // get file last write time failed, reserve it for safety\n            dwarn(\"get last write time of file %s failed\", log->path().c_str());\n            return true;\n        }\n        file_last_write_time = (uint64_t)tm;\n        log->set_last_write_time(file_last_write_time);\n    }\n    uint64_t current_time = dsn_now_ms() / 1000;\n    if (file_last_write_time + reserve_max_time < current_time) {\n        // already exceed time limit, should not reserve\n        return false;\n    }\n\n    // not exceed size and time limit, reserve it\n    return true;\n}\n\nint mutation_log::garbage_collection(gpid gpid,\n                                     decree cleanable_decree,\n                                     int64_t valid_start_offset,\n                                     int64_t reserve_max_size,\n                                     int64_t reserve_max_time)\n{\n    dassert(_is_private, \"this method is only valid for private log\");\n\n    std::map<int, log_file_ptr> files;\n    decree max_decree = invalid_decree;\n    int current_file_index = -1;\n\n    {\n        zauto_lock l(_lock);\n        files = _log_files;\n        max_decree = _private_log_info.max_decree;\n        if (_current_log_file != nullptr)\n            current_file_index = _current_log_file->index();\n    }\n\n    if (files.size() <= 1) {\n        // nothing to do\n        return 0;\n    } else {\n        // the last one should be the current log file\n        dassert(current_file_index == -1 || files.rbegin()->first == current_file_index,\n                \"invalid current_file_index, index = %d\",\n                current_file_index);\n    }\n\n    // find the largest file which can be deleted.\n    // after iterate, the 'mark_it' will point to the largest file which can be deleted.\n    std::map<int, log_file_ptr>::reverse_iterator mark_it;\n    int64_t already_reserved_size = 0;\n    for (mark_it = files.rbegin(); mark_it != files.rend(); ++mark_it) {\n        log_file_ptr log = mark_it->second;\n        dassert(mark_it->first == log->index(), \"%d VS %d\", mark_it->first, log->index());\n        // currently, \"max_decree\" is the max decree covered by this log.\n\n        // reserve current file\n        if (current_file_index == log->index()) {\n            // not break, go to update max decree\n        }\n\n        // reserve if the file is covered by both reserve_max_size and reserve_max_time\n        else if (should_reserve_file(\n                     log, already_reserved_size, reserve_max_size, reserve_max_time)) {\n            // not break, go to update max decree\n        }\n\n        // log is invalid, ok to delete\n        else if (valid_start_offset >= log->end_offset()) {\n            ddebug_f(\"gc_private @ {}: will remove files {} ~ log.{} because \"\n                     \"valid_start_offset={} outdates log_end_offset={}\",\n                     _private_gpid,\n                     files.begin()->second->path(),\n                     log->index(),\n                     valid_start_offset,\n                     log->end_offset());\n            break;\n        }\n\n        // all mutations are cleanable, ok to delete\n        else if (cleanable_decree >= max_decree) {\n            ddebug_f(\"gc_private @ {}: will remove files {} ~ log.{} because \"\n                     \"cleanable_decree={} outdates max_decree={}\",\n                     _private_gpid,\n                     files.begin()->second->path(),\n                     log->index(),\n                     cleanable_decree,\n                     max_decree);\n            break;\n        }\n\n        // update max decree for the next log file\n        auto &max_decrees = log->previous_log_max_decrees();\n        auto it3 = max_decrees.find(gpid);\n        dassert(it3 != max_decrees.end(), \"impossible for private logs\");\n        max_decree = it3->second.max_decree;\n        already_reserved_size += log->end_offset() - log->start_offset();\n    }\n\n    if (mark_it == files.rend()) {\n        // no file to delete\n        return 0;\n    }\n\n    // ok, let's delete files in increasing order of file index\n    // to avoid making a hole in the file list\n    int largest_to_delete = mark_it->second->index();\n    int deleted = 0;\n    for (auto it = files.begin(); it != files.end() && it->second->index() <= largest_to_delete;\n         ++it) {\n        log_file_ptr log = it->second;\n        dcheck_eq(it->first, log->index());\n\n        // close first\n        log->close();\n\n        // delete file\n        auto &fpath = log->path();\n        if (!dsn::utils::filesystem::remove_path(fpath)) {\n            derror(\"gc_private @ %d.%d: fail to remove %s, stop current gc cycle ...\",\n                   _private_gpid.get_app_id(),\n                   _private_gpid.get_partition_index(),\n                   fpath.c_str());\n            break;\n        }\n\n        // delete succeed\n        ddebug_f(\"gc_private @ {}: log file {} is removed\", _private_gpid, fpath);\n        deleted++;\n\n        // erase from _log_files\n        {\n            zauto_lock l(_lock);\n            _log_files.erase(it->first);\n            _global_start_offset =\n                _log_files.size() > 0 ? _log_files.begin()->second->start_offset() : 0;\n        }\n    }\n\n    return deleted;\n}\n\nint mutation_log::garbage_collection(const replica_log_info_map &gc_condition,\n                                     int file_count_limit,\n                                     std::set<gpid> &prevent_gc_replicas)\n{\n    dassert(!_is_private, \"this method is only valid for shared log\");\n\n    std::map<int, log_file_ptr> files;\n    replica_log_info_map max_decrees;\n    int current_log_index = -1;\n    int64_t total_log_size = 0;\n\n    {\n        zauto_lock l(_lock);\n        files = _log_files;\n        max_decrees = _shared_log_info_map;\n        if (_current_log_file != nullptr)\n            current_log_index = _current_log_file->index();\n        total_log_size = total_size_no_lock();\n    }\n\n    if (files.size() <= 1) {\n        // nothing to do\n        ddebug(\"gc_shared: too few files to delete, file_count_limit = %d, \"\n               \"reserved_log_count = %d, reserved_log_size = %\" PRId64 \", current_log_index = %d\",\n               file_count_limit,\n               (int)files.size(),\n               total_log_size,\n               current_log_index);\n        return (int)files.size();\n    } else {\n        // the last one should be the current log file\n        dassert(-1 == current_log_index || files.rbegin()->first == current_log_index,\n                \"invalid current_log_index, index = %d\",\n                current_log_index);\n    }\n\n    int reserved_log_count = files.size();\n    int64_t reserved_log_size = total_log_size;\n    int reserved_smallest_log = files.begin()->first;\n    int reserved_largest_log = current_log_index;\n\n    // find the largest file which can be deleted.\n    // after iterate, the 'mark_it' will point to the largest file which can be deleted.\n    std::map<int, log_file_ptr>::reverse_iterator mark_it;\n    std::set<gpid> kickout_replicas;\n    gpid stop_gc_replica;\n    int stop_gc_log_index = 0;\n    decree stop_gc_decree_gap = 0;\n    decree stop_gc_garbage_max_decree = 0;\n    decree stop_gc_log_max_decree = 0;\n    int file_count = 0;\n    for (mark_it = files.rbegin(); mark_it != files.rend(); ++mark_it) {\n        log_file_ptr log = mark_it->second;\n        dassert(mark_it->first == log->index(), \"%d VS %d\", mark_it->first, log->index());\n        file_count++;\n\n        bool delete_ok = true;\n\n        // skip current file\n        if (current_log_index == log->index()) {\n            delete_ok = false;\n        }\n\n        if (delete_ok) {\n            std::set<gpid> prevent_gc_replicas_for_this_log;\n\n            for (auto &kv : gc_condition) {\n                if (kickout_replicas.find(kv.first) != kickout_replicas.end()) {\n                    // no need to consider this replica\n                    continue;\n                }\n\n                gpid gpid = kv.first;\n                decree garbage_max_decree = kv.second.max_decree;\n                int64_t valid_start_offset = kv.second.valid_start_offset;\n\n                bool delete_ok_for_this_replica = false;\n                bool kickout_this_replica = false;\n                auto it3 = max_decrees.find(gpid);\n\n                // log not found for this replica, ok to delete\n                if (it3 == max_decrees.end()) {\n                    // valid_start_offset may be reset to 0 if initialize_on_load() returns\n                    // ERR_INCOMPLETE_DATA\n                    dassert(\n                        valid_start_offset == 0 || valid_start_offset >= log->end_offset(),\n                        \"valid start offset must be 0 or greater than the end of this log file\");\n\n                    dinfo(\"gc @ %d.%d: max_decree for %s is missing vs %\" PRId64\n                          \" as garbage max decree,\"\n                          \" safe to delete this and all older logs for this replica\",\n                          gpid.get_app_id(),\n                          gpid.get_partition_index(),\n                          log->path().c_str(),\n                          garbage_max_decree);\n                    delete_ok_for_this_replica = true;\n                    kickout_this_replica = true;\n                }\n\n                // log is invalid for this replica, ok to delete\n                else if (log->end_offset() <= valid_start_offset) {\n                    dinfo(\n                        \"gc @ %d.%d: log is invalid for %s, as\"\n                        \" valid start offset vs log end offset = %\" PRId64 \" vs %\" PRId64 \",\"\n                        \" it is therefore safe to delete this and all older logs for this replica\",\n                        gpid.get_app_id(),\n                        gpid.get_partition_index(),\n                        log->path().c_str(),\n                        valid_start_offset,\n                        log->end_offset());\n                    delete_ok_for_this_replica = true;\n                    kickout_this_replica = true;\n                }\n\n                // all decrees are no more than garbage max decree, ok to delete\n                else if (it3->second.max_decree <= garbage_max_decree) {\n                    dinfo(\n                        \"gc @ %d.%d: max_decree for %s is %\" PRId64 \" vs %\" PRId64\n                        \" as garbage max decree,\"\n                        \" it is therefore safe to delete this and all older logs for this replica\",\n                        gpid.get_app_id(),\n                        gpid.get_partition_index(),\n                        log->path().c_str(),\n                        it3->second.max_decree,\n                        garbage_max_decree);\n                    delete_ok_for_this_replica = true;\n                    kickout_this_replica = true;\n                }\n\n                else // it3->second.max_decree > garbage_max_decree\n                {\n                    // should not delete this file\n                    dinfo(\"gc @ %d.%d: max_decree for %s is %\" PRId64 \" vs %\" PRId64\n                          \" as garbage max decree,\"\n                          \" it is therefore not allowed to delete this and all older logs\",\n                          gpid.get_app_id(),\n                          gpid.get_partition_index(),\n                          log->path().c_str(),\n                          it3->second.max_decree,\n                          garbage_max_decree);\n                    prevent_gc_replicas_for_this_log.insert(gpid);\n                    decree gap = it3->second.max_decree - garbage_max_decree;\n                    if (log->index() < stop_gc_log_index || gap > stop_gc_decree_gap) {\n                        // record the max gap replica for the smallest log\n                        stop_gc_replica = gpid;\n                        stop_gc_log_index = log->index();\n                        stop_gc_decree_gap = gap;\n                        stop_gc_garbage_max_decree = garbage_max_decree;\n                        stop_gc_log_max_decree = it3->second.max_decree;\n                    }\n                }\n\n                if (kickout_this_replica) {\n                    // files before this file is useless for this replica,\n                    // so from now on, this replica will not be considered anymore\n                    kickout_replicas.insert(gpid);\n                }\n\n                if (!delete_ok_for_this_replica) {\n                    // can not delete this file, mark it, and continue to check other replicas\n                    delete_ok = false;\n                }\n            }\n\n            // update prevent_gc_replicas\n            if (file_count > file_count_limit && !prevent_gc_replicas_for_this_log.empty()) {\n                prevent_gc_replicas.insert(prevent_gc_replicas_for_this_log.begin(),\n                                           prevent_gc_replicas_for_this_log.end());\n            }\n        }\n\n        if (delete_ok) {\n            // found the largest file which can be deleted\n            break;\n        }\n\n        // update max_decrees for the next log file\n        max_decrees = log->previous_log_max_decrees();\n    }\n\n    if (mark_it == files.rend()) {\n        // no file to delete\n        if (stop_gc_decree_gap > 0) {\n            ddebug(\"gc_shared: no file can be deleted, file_count_limit = %d, \"\n                   \"reserved_log_count = %d, reserved_log_size = %\" PRId64 \", \"\n                   \"reserved_smallest_log = %d, reserved_largest_log = %d, \"\n                   \"stop_gc_log_index = %d, stop_gc_replica_count = %d, \"\n                   \"stop_gc_replica = %d.%d, stop_gc_decree_gap = %\" PRId64 \", \"\n                   \"stop_gc_garbage_max_decree = %\" PRId64 \", stop_gc_log_max_decree = %\" PRId64 \"\",\n                   file_count_limit,\n                   reserved_log_count,\n                   reserved_log_size,\n                   reserved_smallest_log,\n                   reserved_largest_log,\n                   stop_gc_log_index,\n                   (int)prevent_gc_replicas.size(),\n                   stop_gc_replica.get_app_id(),\n                   stop_gc_replica.get_partition_index(),\n                   stop_gc_decree_gap,\n                   stop_gc_garbage_max_decree,\n                   stop_gc_log_max_decree);\n        } else {\n            ddebug(\"gc_shared: no file can be deleted, file_count_limit = %d, \"\n                   \"reserved_log_count = %d, reserved_log_size = %\" PRId64 \", \"\n                   \"reserved_smallest_log = %d, reserved_largest_log = %d, \",\n                   file_count_limit,\n                   reserved_log_count,\n                   reserved_log_size,\n                   reserved_smallest_log,\n                   reserved_largest_log);\n        }\n\n        return reserved_log_count;\n    }\n\n    // ok, let's delete files in increasing order of file index\n    // to avoid making a hole in the file list\n    int largest_log_to_delete = mark_it->second->index();\n    int to_delete_log_count = 0;\n    int64_t to_delete_log_size = 0;\n    int deleted_log_count = 0;\n    int64_t deleted_log_size = 0;\n    int deleted_smallest_log = 0;\n    int deleted_largest_log = 0;\n    for (auto it = files.begin(); it != files.end() && it->second->index() <= largest_log_to_delete;\n         ++it) {\n        log_file_ptr log = it->second;\n        dassert(it->first == log->index(), \"%d VS %d\", it->first, log->index());\n        to_delete_log_count++;\n        to_delete_log_size += log->end_offset() - log->start_offset();\n\n        // close first\n        log->close();\n\n        // delete file\n        auto &fpath = log->path();\n        if (!dsn::utils::filesystem::remove_path(fpath)) {\n            derror(\"gc_shared: fail to remove %s, stop current gc cycle ...\", fpath.c_str());\n            break;\n        }\n\n        // delete succeed\n        ddebug(\"gc_shared: log file %s is removed\", fpath.c_str());\n        deleted_log_count++;\n        deleted_log_size += log->end_offset() - log->start_offset();\n        if (deleted_smallest_log == 0)\n            deleted_smallest_log = log->index();\n        deleted_largest_log = log->index();\n\n        // erase from _log_files\n        {\n            zauto_lock l(_lock);\n            _log_files.erase(it->first);\n            _global_start_offset =\n                _log_files.size() > 0 ? _log_files.begin()->second->start_offset() : 0;\n            reserved_log_count = _log_files.size();\n            reserved_log_size = total_size_no_lock();\n            if (reserved_log_count > 0) {\n                reserved_smallest_log = _log_files.begin()->first;\n                reserved_largest_log = _log_files.rbegin()->first;\n            } else {\n                reserved_smallest_log = -1;\n                reserved_largest_log = -1;\n            }\n        }\n    }\n\n    if (stop_gc_decree_gap > 0) {\n        ddebug(\"gc_shared: deleted some files, file_count_limit = %d, \"\n               \"reserved_log_count = %d, reserved_log_size = %\" PRId64 \", \"\n               \"reserved_smallest_log = %d, reserved_largest_log = %d, \"\n               \"to_delete_log_count = %d, to_delete_log_size = %\" PRId64 \", \"\n               \"deleted_log_count = %d, deleted_log_size = %\" PRId64 \", \"\n               \"deleted_smallest_log = %d, deleted_largest_log = %d, \"\n               \"stop_gc_log_index = %d, stop_gc_replica_count = %d, \"\n               \"stop_gc_replica = %d.%d, stop_gc_decree_gap = %\" PRId64 \", \"\n               \"stop_gc_garbage_max_decree = %\" PRId64 \", stop_gc_log_max_decree = %\" PRId64 \"\",\n               file_count_limit,\n               reserved_log_count,\n               reserved_log_size,\n               reserved_smallest_log,\n               reserved_largest_log,\n               to_delete_log_count,\n               to_delete_log_size,\n               deleted_log_count,\n               deleted_log_size,\n               deleted_smallest_log,\n               deleted_largest_log,\n               stop_gc_log_index,\n               (int)prevent_gc_replicas.size(),\n               stop_gc_replica.get_app_id(),\n               stop_gc_replica.get_partition_index(),\n               stop_gc_decree_gap,\n               stop_gc_garbage_max_decree,\n               stop_gc_log_max_decree);\n    } else {\n        ddebug(\"gc_shared: deleted some files, file_count_limit = %d, \"\n               \"reserved_log_count = %d, reserved_log_size = %\" PRId64 \", \"\n               \"reserved_smallest_log = %d, reserved_largest_log = %d, \"\n               \"to_delete_log_count = %d, to_delete_log_size = %\" PRId64 \", \"\n               \"deleted_log_count = %d, deleted_log_size = %\" PRId64 \", \"\n               \"deleted_smallest_log = %d, deleted_largest_log = %d\",\n               file_count_limit,\n               reserved_log_count,\n               reserved_log_size,\n               reserved_smallest_log,\n               reserved_largest_log,\n               to_delete_log_count,\n               to_delete_log_size,\n               deleted_log_count,\n               deleted_log_size,\n               deleted_smallest_log,\n               deleted_largest_log);\n    }\n\n    return reserved_log_count;\n}\n\nstd::map<int, log_file_ptr> mutation_log::get_log_file_map() const\n{\n    zauto_lock l(_lock);\n    return _log_files;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/mutation_log.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"common/replication_common.h\"\n#include \"mutation.h\"\n#include \"log_block.h\"\n#include \"log_file.h\"\n\n#include <atomic>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/utility/errors.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/dist/replication/replica_base.h>\n\nnamespace dsn {\nnamespace replication {\n\n//\n// manage a sequence of continuous mutation log files\n// each log file name is: log.{index}.{global_start_offset}\n//\n// this class is thread safe\n//\nclass replica;\nclass mutation_log : public ref_counter\n{\npublic:\n    // DEPRECATED: The returned bool value will never be evaluated.\n    // Always return true in the callback.\n    typedef std::function<bool(int log_length, mutation_ptr &)> replay_callback;\n\n    typedef std::function<void(dsn::error_code err)> io_failure_callback;\n\npublic:\n    // append a log mutation\n    // return value: nullptr for error\n    // thread safe\n    virtual ::dsn::task_ptr append(mutation_ptr &mu,\n                                   dsn::task_code callback_code,\n                                   dsn::task_tracker *tracker,\n                                   aio_handler &&callback,\n                                   int hash = 0,\n                                   int64_t *pending_size = nullptr) = 0;\n\n    // get learn state in memory, including pending and writing mutations\n    // return true if some data is filled into writer\n    // return false if no data is filled into writer\n    // thread safe\n    virtual bool get_learn_state_in_memory(decree start_decree, binary_writer &writer) const\n    {\n        return false;\n    }\n\n    // only for private log\n    // get in-memory mutations, including pending and writing mutations\n    virtual void get_in_memory_mutations(decree start_decree,\n                                         ballot current_ballot,\n                                         /*out*/ std::vector<mutation_ptr> &mutations_list) const\n    {\n    }\n\n    // flush the pending buffer until all data is on disk\n    // thread safe\n    virtual void flush() = 0;\n\n    // flush the pending buffer at most once\n    // thread safe\n    virtual void flush_once() = 0;\n\npublic:\n    //\n    // ctors\n    // when is_private = true, should specify \"private_gpid\"\n    //\n    mutation_log(const std::string &dir, int32_t max_log_file_mb, gpid gpid, replica *r = nullptr);\n\n    virtual ~mutation_log() = default;\n\n    //\n    // initialization\n    //\n\n    // open and replay\n    // returns ERR_OK if succeed\n    // not thread safe, but only be called when init\n    error_code open(replay_callback read_callback, io_failure_callback write_error_callback);\n    error_code open(replay_callback read_callback,\n                    io_failure_callback write_error_callback,\n                    const std::map<gpid, decree> &replay_condition);\n    // close the log\n    // thread safe\n    void close();\n\n    //\n    // replay\n    //\n    static error_code replay(std::vector<std::string> &log_files,\n                             replay_callback callback,\n                             /*out*/ int64_t &end_offset);\n\n    // Reads a series of mutations from the log file (from `start_offset` of `log`),\n    // and iterates over the mutations, executing the provided `callback` for each\n    // mutation entry.\n    // Since the logs are packed into multiple blocks, this function retrieves\n    // only one log block at a time.\n    //\n    // Parameters:\n    // - callback: the callback to execute for each mutation.\n    // - start_offset: file offset to start.\n    //\n    // Returns:\n    // - ERR_INVALID_DATA: if the loaded data is incorrect or invalid.\n    //\n    static error_s replay_block(log_file_ptr &log,\n                                replay_callback &callback,\n                                size_t start_offset,\n                                /*out*/ int64_t &end_offset);\n    static error_s replay_block(log_file_ptr &log,\n                                replay_callback &&callback,\n                                size_t start_offset,\n                                /*out*/ int64_t &end_offset)\n    {\n        return replay_block(log, callback, start_offset, end_offset);\n    }\n\n    // Resets mutation log with log files under `dir`.\n    // The original log will be removed after this call.\n    // NOTE: log should be opened before this method called. now it only be used private log\n    error_code reset_from(const std::string &dir, replay_callback, io_failure_callback);\n\n    //\n    // maintain max_decree & valid_start_offset\n    //\n\n    // when open a exist replica, need to set valid_start_offset on open\n    // thread safe\n    void set_valid_start_offset_on_open(gpid gpid, int64_t valid_start_offset);\n\n    // when create a new replica, need to reset current max decree\n    // returns current global end offset, needs to be remebered by caller for gc usage\n    // thread safe\n    int64_t on_partition_reset(gpid gpid, decree max_decree);\n\n    // remove entry from _previous_log_max_decrees when a partition is removed.\n    // only used for private log.\n    // thread safe\n    void on_partition_removed(gpid gpid);\n\n    // update current max decree\n    // thread safe\n    void update_max_decree(gpid gpid, decree d);\n\n    // update current max commit of private log\n    // thread safe\n    void update_max_commit_on_disk(decree d);\n\n    //\n    //  garbage collection logs that are already covered by\n    //  durable state on disk, return deleted log segment count\n    //\n\n    // garbage collection for private log, returns removed file count.\n    // can remove log files if satisfy all the conditions:\n    //  - the file is not the current log file\n    //  - the file is not covered by reserve_max_size or reserve_max_time\n    //  - file.max_decree <= \"durable_decree\" || file.end_offset <= \"valid_start_offset\"\n    // that means, should reserve files if satisfy one of the conditions:\n    //  - the file is the current log file\n    //  - the file is covered by both reserve_max_size and reserve_max_time\n    //  - file.max_decree > \"durable_decree\" && file.end_offset > \"valid_start_offset\"\n    // thread safe\n    int garbage_collection(gpid gpid,\n                           decree durable_decree,\n                           int64_t valid_start_offset,\n                           int64_t reserve_max_size,\n                           int64_t reserve_max_time);\n\n    // garbage collection for shared log, returns reserved file count.\n    // `prevent_gc_replicas' will store replicas which prevent log files out of `file_count_limit'\n    // to be deleted.\n    // remove log files if satisfy:\n    //  - for each replica \"r\":\n    //         r is not in file.max_decree\n    //      || file.max_decree[r] <= gc_condition[r].max_decree\n    //      || file.end_offset[r] <= gc_condition[r].valid_start_offset\n    //  - the current log file should not be removed\n    // thread safe\n    int garbage_collection(const replica_log_info_map &gc_condition,\n                           int file_count_limit,\n                           std::set<gpid> &prevent_gc_replicas);\n\n    //\n    // when this is a private log, log files are learned by remote replicas\n    // return true if private log surely covers the learning range\n    //\n    bool get_learn_state(gpid gpid, decree start, /*out*/ learn_state &state) const;\n\n    // only valid for private log\n    // get parent mutations in memory and private log files during partition split\n    // total_file_size is used for split perf-counter\n    void get_parent_mutations_and_logs(gpid pid,\n                                       decree start_decree,\n                                       ballot start_ballot,\n                                       /*out*/ std::vector<mutation_ptr> &mutation_list,\n                                       /*out*/ std::vector<std::string> &files,\n                                       /*out*/ uint64_t &total_file_size) const;\n\n    //\n    //  other inquiry routines\n    //\n\n    // log dir\n    // thread safe (because nerver changed)\n    const std::string &dir() const { return _dir; }\n\n    // replica\n    replica *owner_replica() const { return _owner_replica; }\n\n    // get current max decree for gpid\n    // returns 0 if not found\n    // thread safe\n    decree max_decree(gpid gpid) const;\n\n    // get current max commit on disk of private log.\n    // thread safe\n    decree max_commit_on_disk() const;\n\n    // Decree of the maximum garbage-collected mutation.\n    // For example, given mutations [20, 100], if [20, 50] is garbage-collected,\n    // the max_gced_decree=50.\n    // Under the real-world cases, the mutations may not be ordered with the file-id.\n    // Given 3 log files:\n    //   #1:[20, 30], #2:[30, 50], #3:[10, 50]\n    // The third file is learned from primary of new epoch. Since it contains mutations smaller\n    // than the others, the max_gced_decree = 9.\n    // Returns `invalid_decree` when plog directory is empty.\n    //\n    // thread-safe & private log only\n    decree max_gced_decree(gpid gpid) const;\n    decree max_gced_decree_no_lock(gpid gpid) const;\n\n    // thread-safe\n    std::map<int, log_file_ptr> get_log_file_map() const;\n\n    // check the consistence of valid_start_offset\n    // thread safe\n    void check_valid_start_offset(gpid gpid, int64_t valid_start_offset) const;\n\n    // get total size.\n    int64_t total_size() const;\n\n    void hint_switch_file() { _switch_file_hint = true; }\n    void demand_switch_file() { _switch_file_demand = true; }\n\n    task_tracker *tracker() { return &_tracker; }\n\nprotected:\n    // thread-safe\n    // 'size' is data size to write; the '_global_end_offset' will be updated by 'size'.\n    // can switch file only when create_new_log_if_needed = true;\n    // return pair: the first is target file to write; the second is the global offset to start\n    // write\n    std::pair<log_file_ptr, int64_t> mark_new_offset(size_t size, bool create_new_log_if_needed);\n    // thread-safe\n    int64_t get_global_offset() const\n    {\n        zauto_lock l(_lock);\n        return _global_end_offset;\n    }\n\n    // init memory states\n    virtual void init_states();\n\nprivate:\n    //\n    //  internal helpers\n    //\n    static error_code replay(log_file_ptr log,\n                             replay_callback callback,\n                             /*out*/ int64_t &end_offset);\n\n    static error_code replay(std::map<int, log_file_ptr> &log_files,\n                             replay_callback callback,\n                             /*out*/ int64_t &end_offset);\n\n    // update max decree without lock\n    void update_max_decree_no_lock(gpid gpid, decree d);\n\n    // update max commit on disk without lock\n    void update_max_commit_on_disk_no_lock(decree d);\n\n    // create new log file and set it as the current log file\n    // returns ERR_OK if create succeed\n    // Preconditions:\n    // - _pending_write == nullptr (because we need create new pending buffer to write file header)\n    // - _lock.locked()\n    error_code create_new_log_file();\n\n    // get total size ithout lock.\n    int64_t total_size_no_lock() const;\n\nprotected:\n    std::string _dir;\n    bool _is_private;\n    gpid _private_gpid;      // only used for private log\n    replica *_owner_replica; // only used for private log\n    io_failure_callback _io_error_callback;\n\n    // options\n    int64_t _max_log_file_size_in_bytes;\n    int64_t _min_log_file_size_in_bytes;\n    bool _force_flush;\n\n    dsn::task_tracker _tracker;\n\nprivate:\n    friend class mutation_log_test;\n    friend class mock_mutation_log_private;\n    friend class mock_mutation_log_shared;\n\n    ///////////////////////////////////////////////\n    //// memory states\n    ///////////////////////////////////////////////\n    mutable zlock _lock;\n    bool _is_opened;\n    bool _switch_file_hint;\n    bool _switch_file_demand;\n\n    // logs\n    int _last_file_index;                   // new log file index = _last_file_index + 1\n    std::map<int, log_file_ptr> _log_files; // index -> log_file_ptr\n    log_file_ptr _current_log_file;         // current log file\n    int64_t _global_start_offset;           // global start offset of all files.\n                                            // invalid if _log_files.size() == 0.\n    int64_t _global_end_offset;             // global end offset currently\n\n    // replica log info\n    // - log_info.max_decree: the max decree of mutations up to now\n    // - log_info.valid_start_offset: the same with replica_init_info::init_offset\n\n    // replica log info for shared log\n    replica_log_info_map _shared_log_info_map;\n\n    // replica log info for private log\n    replica_log_info _private_log_info;\n    decree\n        _private_max_commit_on_disk; // the max last_committed_decree of written mutations up to now\n                                     // used for limiting garbage collection of shared log, because\n                                     // the ending of private log should be covered by shared log\n};\ntypedef dsn::ref_ptr<mutation_log> mutation_log_ptr;\n\nclass mutation_log_shared : public mutation_log\n{\npublic:\n    mutation_log_shared(const std::string &dir,\n                        int32_t max_log_file_mb,\n                        bool force_flush,\n                        perf_counter_wrapper *write_size_counter = nullptr)\n        : mutation_log(dir, max_log_file_mb, dsn::gpid(), nullptr),\n          _is_writing(false),\n          _force_flush(force_flush),\n          _write_size_counter(write_size_counter)\n    {\n    }\n\n    virtual ~mutation_log_shared() override\n    {\n        close();\n        _tracker.cancel_outstanding_tasks();\n    }\n\n    virtual ::dsn::task_ptr append(mutation_ptr &mu,\n                                   dsn::task_code callback_code,\n                                   dsn::task_tracker *tracker,\n                                   aio_handler &&callback,\n                                   int hash = 0,\n                                   int64_t *pending_size = nullptr) override;\n\n    virtual void flush() override;\n    virtual void flush_once() override;\n\nprivate:\n    // async write pending mutations into log file\n    // Preconditions:\n    // - _pending_write != nullptr\n    // - _issued_write.expired() == true (because only one async write is allowed at the same time)\n    // release_lock_required should always be true => this function must release the lock\n    // appropriately for less lock contention\n    void write_pending_mutations(bool release_lock_required);\n\n    void commit_pending_mutations(log_file_ptr &lf, std::shared_ptr<log_appender> &pending);\n\n    // flush at most count times\n    // if count <= 0, means flush until all data is on disk\n    void flush_internal(int max_count);\n\nprivate:\n    // bufferring - only one concurrent write is allowed\n    mutable zlock _slock;\n    std::atomic_bool _is_writing;\n    std::shared_ptr<log_appender> _pending_write;\n\n    bool _force_flush;\n    perf_counter_wrapper *_write_size_counter;\n};\n\nclass mutation_log_private : public mutation_log, private replica_base\n{\npublic:\n    // Parameters:\n    //  - batch_buffer_max_count, batch_buffer_bytes\n    //    The hint of limited size for the write buffer storing the pending mutations.\n    //    Note that the actual log block is still possible to be larger than the\n    //    hinted size.\n    mutation_log_private(const std::string &dir, int32_t max_log_file_mb, gpid gpid, replica *r);\n\n    ~mutation_log_private() override\n    {\n        close();\n        _tracker.cancel_outstanding_tasks();\n    }\n\n    virtual ::dsn::task_ptr append(mutation_ptr &mu,\n                                   dsn::task_code callback_code,\n                                   dsn::task_tracker *tracker,\n                                   aio_handler &&callback,\n                                   int hash = 0,\n                                   int64_t *pending_size = nullptr) override;\n\n    virtual bool get_learn_state_in_memory(decree start_decree,\n                                           binary_writer &writer) const override;\n\n    // get in-memory mutations, including pending and writing mutations\n    virtual void\n    get_in_memory_mutations(decree start_decree,\n                            ballot start_ballot,\n                            /*out*/ std::vector<mutation_ptr> &mutation_list) const override;\n\n    virtual void flush() override;\n    virtual void flush_once() override;\n\nprivate:\n    // async write pending mutations into log file\n    // Preconditions:\n    // - _pending_write != nullptr\n    // - _issued_write.expired() == true (because only one async write is allowed at the same time)\n    // release_lock_required should always be true => this function must release the lock\n    // appropriately for less lock contention\n    void write_pending_mutations(bool release_lock_required);\n\n    void commit_pending_mutations(log_file_ptr &lf,\n                                  std::shared_ptr<log_appender> &pending,\n                                  decree max_commit);\n\n    virtual void init_states() override;\n\n    // flush at most count times\n    // if count <= 0, means flush until all data is on disk\n    void flush_internal(int max_count);\n\nprivate:\n    // bufferring - only one concurrent write is allowed\n    typedef std::vector<mutation_ptr> mutations;\n    std::atomic_bool _is_writing;\n    // Writes that are emitted to `commit_log_block` but are not completely written.\n    // The weak_ptr used here is a trick. Once the pointer freed, ie.\n    // `_issued_write.lock() == nullptr`, it means the emitted writes all finished.\n    std::weak_ptr<log_appender> _issued_write;\n    std::shared_ptr<log_appender> _pending_write;\n    decree _pending_write_max_commit;\n    decree _pending_write_max_decree;\n    mutable zlock _plock;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/mutation_log_replay.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"mutation_log.h\"\n#include \"mutation_log_utils.h\"\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/errors.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\n/*static*/ error_code mutation_log::replay(log_file_ptr log,\n                                           replay_callback callback,\n                                           /*out*/ int64_t &end_offset)\n{\n    end_offset = log->start_offset();\n    ddebug(\"start to replay mutation log %s, offset = [%\" PRId64 \", %\" PRId64 \"), size = %\" PRId64,\n           log->path().c_str(),\n           log->start_offset(),\n           log->end_offset(),\n           log->end_offset() - log->start_offset());\n\n    ::dsn::blob bb;\n    log->reset_stream();\n    error_s err;\n    size_t start_offset = 0;\n    while (true) {\n        err = replay_block(log, callback, start_offset, end_offset);\n        if (!err.is_ok()) {\n            // Stop immediately if failed\n            break;\n        }\n\n        start_offset = static_cast<size_t>(end_offset - log->start_offset());\n    }\n\n    ddebug(\"finish to replay mutation log (%s) [err: %s]\",\n           log->path().c_str(),\n           err.description().c_str());\n    return err.code();\n}\n\n/*static*/ error_s mutation_log::replay_block(log_file_ptr &log,\n                                              replay_callback &callback,\n                                              size_t start_offset,\n                                              int64_t &end_offset)\n{\n    FAIL_POINT_INJECT_F(\"mutation_log_replay_block\", [](string_view) -> error_s {\n        return error_s::make(ERR_INCOMPLETE_DATA, \"mutation_log_replay_block\");\n    });\n\n    blob bb;\n    std::unique_ptr<binary_reader> reader;\n\n    log->reset_stream(start_offset); // start reading from given offset\n    int64_t global_start_offset = start_offset + log->start_offset();\n    end_offset = global_start_offset; // reset end_offset to the start.\n\n    // reads the entire block into memory\n    error_code err = log->read_next_log_block(bb);\n    if (err != ERR_OK) {\n        return error_s::make(err, \"failed to read log block\");\n    }\n\n    reader = dsn::make_unique<binary_reader>(bb);\n    end_offset += sizeof(log_block_header);\n\n    // The first block is log_file_header.\n    if (global_start_offset == log->start_offset()) {\n        end_offset += log->read_file_header(*reader);\n        if (!log->is_right_header()) {\n            return error_s::make(ERR_INVALID_DATA, \"failed to read log file header\");\n        }\n        // continue to parsing the data block\n    }\n\n    while (!reader->is_eof()) {\n        auto old_size = reader->get_remaining_size();\n        mutation_ptr mu = mutation::read_from(*reader, nullptr);\n        dassert(nullptr != mu, \"\");\n        mu->set_logged();\n\n        if (mu->data.header.log_offset != end_offset) {\n            return FMT_ERR(ERR_INVALID_DATA,\n                           \"offset mismatch in log entry and mutation {} vs {}\",\n                           end_offset,\n                           mu->data.header.log_offset);\n        }\n\n        int log_length = old_size - reader->get_remaining_size();\n\n        callback(log_length, mu);\n\n        end_offset += log_length;\n    }\n\n    return error_s::ok();\n}\n\n/*static*/ error_code mutation_log::replay(std::vector<std::string> &log_files,\n                                           replay_callback callback,\n                                           /*out*/ int64_t &end_offset)\n{\n    std::map<int, log_file_ptr> logs;\n    for (auto &fpath : log_files) {\n        error_code err;\n        log_file_ptr log = log_file::open_read(fpath.c_str(), err);\n        if (log == nullptr) {\n            if (err == ERR_HANDLE_EOF || err == ERR_INCOMPLETE_DATA ||\n                err == ERR_INVALID_PARAMETERS) {\n                dinfo(\"skip file %s during log replay\", fpath.c_str());\n                continue;\n            } else {\n                return err;\n            }\n        }\n\n        dassert(\n            logs.find(log->index()) == logs.end(), \"invalid log_index, index = %d\", log->index());\n        logs[log->index()] = log;\n    }\n\n    return replay(logs, callback, end_offset);\n}\n\n/*static*/ error_code mutation_log::replay(std::map<int, log_file_ptr> &logs,\n                                           replay_callback callback,\n                                           /*out*/ int64_t &end_offset)\n{\n    int64_t g_start_offset = 0;\n    int64_t g_end_offset = 0;\n    error_code err = ERR_OK;\n    log_file_ptr last;\n\n    if (logs.size() > 0) {\n        g_start_offset = logs.begin()->second->start_offset();\n        g_end_offset = logs.rbegin()->second->end_offset();\n    }\n\n    error_s error = log_utils::check_log_files_continuity(logs);\n    if (!error.is_ok()) {\n        derror_f(\"check_log_files_continuity failed: {}\", error);\n        return error.code();\n    }\n\n    end_offset = g_start_offset;\n\n    for (auto &kv : logs) {\n        log_file_ptr &log = kv.second;\n\n        if (log->start_offset() != end_offset) {\n            derror(\"offset mismatch in log file offset and global offset %\" PRId64 \" vs %\" PRId64,\n                   log->start_offset(),\n                   end_offset);\n            return ERR_INVALID_DATA;\n        }\n\n        last = log;\n        err = mutation_log::replay(log, callback, end_offset);\n\n        log->close();\n\n        if (err == ERR_OK || err == ERR_HANDLE_EOF) {\n            // do nothing\n        } else if (err == ERR_INCOMPLETE_DATA) {\n            // If the file is not corrupted, it may also return the value of ERR_INCOMPLETE_DATA.\n            // In this case, the correctness is relying on the check of start_offset.\n            dwarn(\"delay handling error: %s\", err.to_string());\n        } else {\n            // for other errors, we should break\n            break;\n        }\n    }\n\n    if (err == ERR_OK || err == ERR_HANDLE_EOF) {\n        // the log may still be written when used for learning\n        dassert(g_end_offset <= end_offset,\n                \"make sure the global end offset is correct: %\" PRId64 \" vs %\" PRId64,\n                g_end_offset,\n                end_offset);\n        err = ERR_OK;\n    } else if (err == ERR_INCOMPLETE_DATA) {\n        // ignore the last incomplate block\n        err = ERR_OK;\n    } else {\n        // bad error\n        derror(\"replay mutation log failed: %s\", err.to_string());\n    }\n\n    return err;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/mutation_log_utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"mutation_log_utils.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace log_utils {\n\n/*extern*/ error_s open_read(string_view path, /*out*/ log_file_ptr &file)\n{\n    FAIL_POINT_INJECT_F(\"open_read\", [](string_view) -> error_s {\n        return error_s::make(ERR_FILE_OPERATION_FAILED, \"open_read\");\n    });\n\n    error_code ec;\n    file = log_file::open_read(path.data(), ec);\n    if (ec != ERR_OK) {\n        return FMT_ERR(ec, \"failed to open the log file ({})\", path);\n    }\n    return error_s::ok();\n}\n\n/*extern*/ error_s list_all_files(const std::string &dir, /*out*/ std::vector<std::string> &files)\n{\n    FAIL_POINT_INJECT_F(\"list_all_files\", [](string_view) -> error_s {\n        return error_s::make(ERR_FILE_OPERATION_FAILED, \"list_all_files\");\n    });\n\n    if (!utils::filesystem::get_subfiles(dir, files, false)) {\n        return FMT_ERR(\n            ERR_FILE_OPERATION_FAILED, \"unable to list the files under directory ({})\", dir);\n    }\n    return error_s::ok();\n}\n\n/*extern*/\nerror_s check_log_files_continuity(const std::map<int, log_file_ptr> &logs)\n{\n    if (logs.empty()) {\n        return error_s::ok();\n    }\n\n    int last_file_index = logs.begin()->first - 1;\n    for (const auto &kv : logs) {\n        if (++last_file_index != kv.first) {\n            // this is a serious error, print all the files in list.\n            std::string all_log_files_str;\n            bool first = true;\n            for (const auto &id_file : logs) {\n                if (!first) {\n                    all_log_files_str += \", \";\n                }\n                first = false;\n                all_log_files_str += fmt::format(\n                    \"log.{}.{}\", id_file.second->index(), id_file.second->start_offset());\n            }\n\n            return FMT_ERR(\n                ERR_OBJECT_NOT_FOUND,\n                \"log file missing with index {}. Here are all the files under dir({}): [{}]\",\n                last_file_index,\n                logs.begin()->second->path(),\n                all_log_files_str);\n        }\n    }\n    return error_s::ok();\n}\n\n} // namespace log_utils\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/mutation_log_utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"mutation_log.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace log_utils {\n\nextern error_s open_read(string_view path, /*out*/ log_file_ptr &file);\n\nextern error_s list_all_files(const std::string &dir, /*out*/ std::vector<std::string> &files);\n\ninline error_s open_log_file_map(const std::vector<std::string> &log_files,\n                                 /*out*/ std::map<int, log_file_ptr> &log_file_map)\n{\n    for (const std::string &fname : log_files) {\n        log_file_ptr lf;\n        error_s err = open_read(fname, lf);\n        if (!err.is_ok()) {\n            return err << \"open_log_file_map(log_files)\";\n        }\n        log_file_map[lf->index()] = lf;\n    }\n    return error_s::ok();\n}\n\ninline error_s open_log_file_map(const std::string &dir,\n                                 /*out*/ std::map<int, log_file_ptr> &log_file_map)\n{\n    std::vector<std::string> log_files;\n    error_s es = list_all_files(dir, log_files);\n    if (!es.is_ok()) {\n        return es << \"open_log_file_map(dir)\";\n    }\n    return open_log_file_map(log_files, log_file_map) << \"open_log_file_map(dir)\";\n}\n\nextern error_s check_log_files_continuity(const std::map<int, log_file_ptr> &logs);\n\ninline error_s check_log_files_continuity(const std::string &dir)\n{\n    std::map<int, log_file_ptr> log_file_map;\n    error_s es = open_log_file_map(dir, log_file_map);\n    if (!es.is_ok()) {\n        return es << \"check_log_files_continuity(dir)\";\n    }\n    return check_log_files_continuity(log_file_map) << \"check_log_files_continuity(dir)\";\n}\n\n} // namespace log_utils\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/prepare_list.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"prepare_list.h\"\n\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\nprepare_list::prepare_list(replica_base *r,\n                           decree init_decree,\n                           int max_count,\n                           mutation_committer committer)\n    : mutation_cache(init_decree, max_count), replica_base(r)\n{\n    _committer = std::move(committer);\n    _last_committed_decree = init_decree;\n}\n\nprepare_list::prepare_list(replica_base *r, const prepare_list &parent_plist)\n    : mutation_cache(parent_plist), replica_base(r)\n{\n    _committer = parent_plist._committer;\n    _last_committed_decree = parent_plist._last_committed_decree;\n}\n\nvoid prepare_list::reset(decree init_decree)\n{\n    _last_committed_decree = init_decree;\n    mutation_cache::reset(init_decree, true);\n}\n\nvoid prepare_list::truncate(decree init_decree)\n{\n    while (min_decree() <= init_decree && count() > 0) {\n        pop_min();\n    }\n\n    if (count() == 0) {\n        mutation_cache::reset(init_decree, true);\n    }\n\n    _last_committed_decree = init_decree;\n}\n\nerror_code prepare_list::prepare(mutation_ptr &mu,\n                                 partition_status::type status,\n                                 bool pop_all_committed_mutations,\n                                 bool secondary_commit)\n{\n    decree d = mu->data.header.decree;\n    dcheck_gt_replica(d, last_committed_decree());\n\n    ADD_POINT(mu->_tracer);\n    error_code err;\n    switch (status) {\n    case partition_status::PS_PRIMARY:\n        // pop committed mutations if buffer is full or pop_all_committed_mutations = true\n        while ((d - min_decree() >= capacity() || pop_all_committed_mutations) &&\n               last_committed_decree() > min_decree()) {\n            pop_min();\n        }\n        return mutation_cache::put(mu);\n\n    case partition_status::PS_SECONDARY:\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        // all mutations with lower decree must be ready\n        if (secondary_commit) {\n            commit(mu->data.header.last_committed_decree, COMMIT_TO_DECREE_HARD);\n        }\n        // pop committed mutations if buffer is full or pop_all_committed_mutations = true\n        while ((d - min_decree() >= capacity() || pop_all_committed_mutations) &&\n               last_committed_decree() > min_decree()) {\n            pop_min();\n        }\n        err = mutation_cache::put(mu);\n        dassert_replica(err == ERR_OK, \"mutation_cache::put failed, err = {}\", err);\n        return err;\n\n    //// delayed commit - only when capacity is an issue\n    // case partition_status::PS_POTENTIAL_SECONDARY:\n    //    while (true)\n    //    {\n    //        err = mutation_cache::put(mu);\n    //        if (err == ERR_CAPACITY_EXCEEDED)\n    //        {\n    //            dassert(mu->data.header.last_committed_decree >= min_decree(), \"\");\n    //            commit (min_decree(), true);\n    //            pop_min();\n    //        }\n    //        else\n    //            break;\n    //    }\n    //    dassert (err == ERR_OK, \"\");\n    //    return err;\n\n    case partition_status::PS_INACTIVE: // only possible during init\n        if (mu->data.header.last_committed_decree > max_decree()) {\n            reset(mu->data.header.last_committed_decree);\n        } else if (mu->data.header.last_committed_decree > _last_committed_decree) {\n            // all mutations with lower decree must be ready\n            commit(mu->data.header.last_committed_decree, COMMIT_TO_DECREE_HARD);\n        }\n        // pop committed mutations if buffer is full\n        while (d - min_decree() >= capacity() && last_committed_decree() > min_decree()) {\n            pop_min();\n        }\n        err = mutation_cache::put(mu);\n        dassert_replica(err == ERR_OK, \"mutation_cache::put failed, err = {}\", err);\n        return err;\n\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", enum_to_string(status));\n        return dsn::ERR_OK;\n    }\n}\n\n//\n// ordered commit\n//\nvoid prepare_list::commit(decree d, commit_type ct)\n{\n    if (d <= last_committed_decree())\n        return;\n\n    ballot last_bt = 0;\n    switch (ct) {\n    case COMMIT_TO_DECREE_HARD: {\n        for (decree d0 = last_committed_decree() + 1; d0 <= d; d0++) {\n            mutation_ptr mu = get_mutation_by_decree(d0);\n\n            dassert_replica(\n                mu != nullptr && mu->is_logged(), \"mutation {} is missing in prepare list\", d0);\n            dcheck_ge_replica(mu->data.header.ballot, last_bt);\n\n            _last_committed_decree++;\n            last_bt = mu->data.header.ballot;\n            _committer(mu);\n        }\n\n        return;\n    }\n    case COMMIT_TO_DECREE_SOFT: {\n        for (decree d0 = last_committed_decree() + 1; d0 <= d; d0++) {\n            mutation_ptr mu = get_mutation_by_decree(d0);\n            if (mu != nullptr && mu->is_ready_for_commit() && mu->data.header.ballot >= last_bt) {\n                _last_committed_decree++;\n                last_bt = mu->data.header.ballot;\n                _committer(mu);\n            } else\n                break;\n        }\n\n        return;\n    }\n    case COMMIT_ALL_READY: {\n        if (d != last_committed_decree() + 1)\n            return;\n\n        int count = 0;\n        mutation_ptr mu = get_mutation_by_decree(last_committed_decree() + 1);\n\n        while (mu != nullptr && mu->is_ready_for_commit() && mu->data.header.ballot >= last_bt) {\n            _last_committed_decree++;\n            last_bt = mu->data.header.ballot;\n            _committer(mu);\n            count++;\n            mu = mutation_cache::get_mutation_by_decree(_last_committed_decree + 1);\n        }\n\n        return;\n    }\n    default:\n        dassert(false, \"invalid commit type %d\", (int)ct);\n    }\n\n    return;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/prepare_list.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include \"mutation_cache.h\"\n\n#include <dsn/dist/replication/replica_base.h>\n\nnamespace dsn {\nnamespace replication {\n\nenum commit_type\n{\n    COMMIT_TO_DECREE_HARD, // commit (last_committed, ...<mutations must be is_commit_ready..., d]\n    COMMIT_TO_DECREE_SOFT, // commit (last_committed, ...<if is_commit_ready mutations>.., d]\n    COMMIT_ALL_READY       // commit (last_committed, ...<all is_commit_ready mutations> ...]\n    // - only valid when partition_status::PS_SECONDARY or partition_status::PS_PRIMARY\n};\n\n// prepare_list origins from the concept of `prepared list` in PacificA.\n// It stores an continuous and ordered list of mutations.\n// The prefix of the prepared list up to a `committed point` is regarded as committed.\n// The prepare_list only stores the most updated part (the uncommitted suffix) of prepared list,\n// say, the committed prefix will be truncated automatically.\nclass prepare_list : public mutation_cache, private replica_base\n{\npublic:\n    typedef std::function<void(mutation_ptr &)> mutation_committer;\n\npublic:\n    prepare_list(replica_base *r, decree init_decree, int max_count, mutation_committer committer);\n    prepare_list(replica_base *r, const prepare_list &parent_plist);\n\n    decree last_committed_decree() const { return _last_committed_decree; }\n    void reset(decree init_decree);\n    void truncate(decree init_decree);\n    void set_committer(mutation_committer committer) { _committer = committer; }\n\n    //\n    // for two-phase commit\n    //\n    // if pop_all_committed_mutations = true, pop all committed mutations, will only used during\n    // bulk load ingestion\n    // if secondary_commit = true, and status is secondary or protential secondary, previous logs\n    // will be committed\n    error_code prepare(mutation_ptr &mu,\n                       partition_status::type status,\n                       bool pop_all_committed_mutations = false,\n                       bool secondary_commit = true);\n    virtual void commit(decree decree, commit_type ct); // ordered commit\n\n    virtual ~prepare_list() = default;\n\nprivate:\n    friend class mutation_buffer;\n    decree _last_committed_decree;\n    mutation_committer _committer;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"duplication/replica_duplicator_manager.h\"\n#include \"duplication/replica_follower.h\"\n#include \"backup/replica_backup_manager.h\"\n#include \"backup/cold_backup_context.h\"\n#include \"bulk_load/replica_bulk_loader.h\"\n#include \"split/replica_split_manager.h\"\n#include \"replica_disk_migrator.h\"\n#include \"runtime/security/access_controller.h\"\n\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/rand.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/strings.h>\n#include <dsn/tool-api/rpc_message.h>\n\nnamespace dsn {\nnamespace replication {\n\nconst std::string replica::kAppInfo = \".app-info\";\n\nreplica::replica(replica_stub *stub,\n                 gpid gpid,\n                 const app_info &app,\n                 const char *dir,\n                 bool need_restore,\n                 bool is_duplication_follower)\n    : serverlet<replica>(\"replica\"),\n      replica_base(gpid, fmt::format(\"{}@{}\", gpid, stub->_primary_address_str), app.app_name),\n      _app_info(app),\n      _primary_states(\n          gpid, stub->options().staleness_for_commit, stub->options().batch_write_disabled),\n      _potential_secondary_states(this),\n      _cold_backup_running_count(0),\n      _cold_backup_max_duration_time_ms(0),\n      _cold_backup_max_upload_file_size(0),\n      _chkpt_total_size(0),\n      _cur_download_size(0),\n      _restore_progress(0),\n      _restore_status(ERR_OK),\n      _duplication_mgr(new replica_duplicator_manager(this)),\n      // todo(jiashuo1): app.duplicating need rename\n      _is_duplication_master(app.duplicating),\n      _is_duplication_follower(is_duplication_follower),\n      _backup_mgr(new replica_backup_manager(this))\n{\n    dassert(_app_info.app_type != \"\", \"\");\n    dassert(stub != nullptr, \"\");\n    _stub = stub;\n    _dir = dir;\n    _options = &stub->options();\n    init_state();\n    _config.pid = gpid;\n    _bulk_loader = make_unique<replica_bulk_loader>(this);\n    _split_mgr = make_unique<replica_split_manager>(this);\n    _disk_migrator = make_unique<replica_disk_migrator>(this);\n    _replica_follower = make_unique<replica_follower>(this);\n\n    std::string counter_str = fmt::format(\"private.log.size(MB)@{}\", gpid);\n    _counter_private_log_size.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.write.throttling.delay.count@{}\", gpid);\n    _counter_recent_write_throttling_delay_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.write.throttling.reject.count@{}\", gpid);\n    _counter_recent_write_throttling_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.read.throttling.delay.count@{}\", gpid);\n    _counter_recent_read_throttling_delay_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.read.throttling.reject.count@{}\", gpid);\n    _counter_recent_read_throttling_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str =\n        fmt::format(\"recent.backup.request.throttling.delay.count@{}\", _app_info.app_name);\n    _counter_recent_backup_request_throttling_delay_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str =\n        fmt::format(\"recent.backup.request.throttling.reject.count@{}\", _app_info.app_name);\n    _counter_recent_backup_request_throttling_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"dup.disabled_non_idempotent_write_count@{}\", _app_info.app_name);\n    _counter_dup_disabled_non_idempotent_write_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.read.splitting.reject.count@{}\", gpid);\n    _counter_recent_read_splitting_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.write.splitting.reject.count@{}\", gpid);\n    _counter_recent_write_splitting_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    counter_str = fmt::format(\"recent.write.bulk.load.ingestion.reject.count@{}\", gpid);\n    _counter_recent_write_bulk_load_ingestion_reject_count.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_VOLATILE_NUMBER, counter_str.c_str());\n\n    // init table level latency perf counters\n    init_table_level_latency_counters();\n\n    counter_str = fmt::format(\"backup_request_qps@{}\", _app_info.app_name);\n    _counter_backup_request_qps.init_app_counter(\n        \"eon.replica\", counter_str.c_str(), COUNTER_TYPE_RATE, counter_str.c_str());\n\n    if (need_restore) {\n        // add an extra env for restore\n        _extra_envs.insert(\n            std::make_pair(backup_restore_constant::FORCE_RESTORE, std::string(\"true\")));\n    }\n\n    _access_controller = security::create_replica_access_controller(name());\n}\n\nvoid replica::update_last_checkpoint_generate_time()\n{\n    _last_checkpoint_generate_time_ms = dsn_now_ms();\n    uint64_t max_interval_ms = _options->checkpoint_max_interval_hours * 3600000UL;\n    // use random trigger time to avoid flush peek\n    _next_checkpoint_interval_trigger_time_ms =\n        _last_checkpoint_generate_time_ms + rand::next_u64(max_interval_ms / 2, max_interval_ms);\n}\n\n//            //\n// Statistics //\n//            //\n\nvoid replica::update_commit_qps(int count)\n{\n    _stub->_counter_replicas_commit_qps->add((uint64_t)count);\n}\n\nvoid replica::init_state()\n{\n    _inactive_is_transient = false;\n    _is_initializing = false;\n    _prepare_list = dsn::make_unique<prepare_list>(\n        this,\n        0,\n        _options->max_mutation_count_in_prepare_list,\n        std::bind(&replica::execute_mutation, this, std::placeholders::_1));\n\n    _config.ballot = 0;\n    _config.pid.set_app_id(0);\n    _config.pid.set_partition_index(0);\n    _config.status = partition_status::PS_INACTIVE;\n    _primary_states.membership.ballot = 0;\n    _create_time_ms = dsn_now_ms();\n    _last_config_change_time_ms = _create_time_ms;\n    update_last_checkpoint_generate_time();\n    _private_log = nullptr;\n    init_disk_tag();\n    get_bool_envs(_app_info.envs, replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND, _allow_ingest_behind);\n}\n\nreplica::~replica(void)\n{\n    close();\n    _prepare_list = nullptr;\n    dinfo(\"%s: replica destroyed\", name());\n}\n\nvoid replica::on_client_read(dsn::message_ex *request, bool ignore_throttling)\n{\n    if (!_access_controller->allowed(request)) {\n        response_client_read(request, ERR_ACL_DENY);\n        return;\n    }\n\n    if (_deny_client.read) {\n        if (_deny_client.reconfig) {\n            // return ERR_INVALID_STATE will trigger client update config immediately\n            response_client_read(request, ERR_INVALID_STATE);\n            return;\n        }\n        // Do not reply any message to the peer client to let it timeout, it's OK coz some users\n        // may retry immediately when they got a not success code which will make the server side\n        // pressure more and more heavy.\n        return;\n    }\n\n    CHECK_REQUEST_IF_SPLITTING(read)\n\n    if (status() == partition_status::PS_INACTIVE ||\n        status() == partition_status::PS_POTENTIAL_SECONDARY) {\n        response_client_read(request, ERR_INVALID_STATE);\n        return;\n    }\n\n    if (!request->is_backup_request()) {\n        // only backup request is allowed to read from a stale replica\n\n        if (!ignore_throttling && throttle_read_request(request)) {\n            return;\n        }\n\n        if (status() != partition_status::PS_PRIMARY) {\n            response_client_read(request, ERR_INVALID_STATE);\n            return;\n        }\n\n        // a small window where the state is not the latest yet\n        if (last_committed_decree() < _primary_states.last_prepare_decree_on_new_primary) {\n            derror_replica(\"last_committed_decree(%\" PRId64\n                           \") < last_prepare_decree_on_new_primary(%\" PRId64 \")\",\n                           last_committed_decree(),\n                           _primary_states.last_prepare_decree_on_new_primary);\n            response_client_read(request, ERR_INVALID_STATE);\n            return;\n        }\n    } else {\n        if (!ignore_throttling && throttle_backup_request(request)) {\n            return;\n        }\n        _counter_backup_request_qps->increment();\n    }\n\n    uint64_t start_time_ns = dsn_now_ns();\n    dassert(_app != nullptr, \"\");\n    _app->on_request(request);\n\n    // If the corresponding perf counter exist, count the duration of this operation.\n    // rpc code of request is already checked in message_ex::rpc_code, so it will always be legal\n    if (_counters_table_level_latency[request->rpc_code()] != nullptr) {\n        _counters_table_level_latency[request->rpc_code()]->set(dsn_now_ns() - start_time_ns);\n    }\n}\n\nvoid replica::response_client_read(dsn::message_ex *request, error_code error)\n{\n    _stub->response_client(get_gpid(), true, request, status(), error);\n}\n\nvoid replica::response_client_write(dsn::message_ex *request, error_code error)\n{\n    _stub->response_client(get_gpid(), false, request, status(), error);\n}\n\nvoid replica::check_state_completeness()\n{\n    /* prepare commit durable */\n    dassert(max_prepared_decree() >= last_committed_decree(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            max_prepared_decree(),\n            last_committed_decree());\n    dassert(last_committed_decree() >= last_durable_decree(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            last_committed_decree(),\n            last_durable_decree());\n}\n\nvoid replica::execute_mutation(mutation_ptr &mu)\n{\n    dinfo(\"%s: execute mutation %s: request_count = %u\",\n          name(),\n          mu->name(),\n          static_cast<int>(mu->client_requests.size()));\n\n    error_code err = ERR_OK;\n    decree d = mu->data.header.decree;\n\n    switch (status()) {\n    case partition_status::PS_INACTIVE:\n        if (_app->last_committed_decree() + 1 == d) {\n            err = _app->apply_mutation(mu);\n        } else {\n            dinfo(\"%s: mutation %s commit to %s skipped, app.last_committed_decree = %\" PRId64,\n                  name(),\n                  mu->name(),\n                  enum_to_string(status()),\n                  _app->last_committed_decree());\n        }\n        break;\n    case partition_status::PS_PRIMARY: {\n        ADD_POINT(mu->_tracer);\n        check_state_completeness();\n        dassert(_app->last_committed_decree() + 1 == d,\n                \"app commit: %\" PRId64 \", mutation decree: %\" PRId64 \"\",\n                _app->last_committed_decree(),\n                d);\n        err = _app->apply_mutation(mu);\n    } break;\n\n    case partition_status::PS_SECONDARY:\n        if (!_secondary_states.checkpoint_is_running) {\n            check_state_completeness();\n            dassert(_app->last_committed_decree() + 1 == d,\n                    \"%\" PRId64 \" VS %\" PRId64 \"\",\n                    _app->last_committed_decree() + 1,\n                    d);\n            err = _app->apply_mutation(mu);\n        } else {\n            dinfo(\"%s: mutation %s commit to %s skipped, app.last_committed_decree = %\" PRId64,\n                  name(),\n                  mu->name(),\n                  enum_to_string(status()),\n                  _app->last_committed_decree());\n\n            // make sure private log saves the state\n            // catch-up will be done later after checkpoint task is fininished\n            dassert(_private_log != nullptr, \"\");\n        }\n        break;\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        if (_potential_secondary_states.learning_status == learner_status::LearningSucceeded ||\n            _potential_secondary_states.learning_status ==\n                learner_status::LearningWithPrepareTransient) {\n            dassert(_app->last_committed_decree() + 1 == d,\n                    \"%\" PRId64 \" VS %\" PRId64 \"\",\n                    _app->last_committed_decree() + 1,\n                    d);\n            err = _app->apply_mutation(mu);\n        } else {\n            dinfo(\"%s: mutation %s commit to %s skipped, app.last_committed_decree = %\" PRId64,\n                  name(),\n                  mu->name(),\n                  enum_to_string(status()),\n                  _app->last_committed_decree());\n\n            // prepare also happens with learner_status::LearningWithPrepare, in this case\n            // make sure private log saves the state,\n            // catch-up will be done later after the checkpoint task is finished\n            dassert(_private_log != nullptr, \"\");\n        }\n        break;\n    case partition_status::PS_PARTITION_SPLIT:\n        if (_split_states.is_caught_up) {\n            dcheck_eq(_app->last_committed_decree() + 1, d);\n            err = _app->apply_mutation(mu);\n        }\n        break;\n    case partition_status::PS_ERROR:\n        break;\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", enum_to_string(status()));\n    }\n\n    dinfo(\n        \"TwoPhaseCommit, %s: mutation %s committed, err = %s\", name(), mu->name(), err.to_string());\n\n    if (err != ERR_OK) {\n        handle_local_failure(err);\n    }\n\n    if (status() == partition_status::PS_PRIMARY) {\n        ADD_CUSTOM_POINT(mu->_tracer, \"completed\");\n        mutation_ptr next = _primary_states.write_queue.check_possible_work(\n            static_cast<int>(_prepare_list->max_decree() - d));\n\n        if (next) {\n            init_prepare(next, false);\n        }\n    }\n\n    // update table level latency perf-counters for primary partition\n    if (partition_status::PS_PRIMARY == status()) {\n        uint64_t now_ns = dsn_now_ns();\n        for (auto update : mu->data.updates) {\n            // If the corresponding perf counter exist, count the duration of this operation.\n            // code in update will always be legal\n            if (_counters_table_level_latency[update.code] != nullptr) {\n                _counters_table_level_latency[update.code]->set(now_ns - update.start_time_ns);\n            }\n        }\n    }\n}\n\nmutation_ptr replica::new_mutation(decree decree)\n{\n    mutation_ptr mu(new mutation());\n    mu->data.header.pid = get_gpid();\n    mu->data.header.ballot = get_ballot();\n    mu->data.header.decree = decree;\n    mu->data.header.log_offset = invalid_offset;\n    return mu;\n}\n\ndecree replica::last_durable_decree() const { return _app->last_durable_decree(); }\n\ndecree replica::last_flushed_decree() const { return _app->last_flushed_decree(); }\n\ndecree replica::last_prepared_decree() const\n{\n    ballot lastBallot = 0;\n    decree start = last_committed_decree();\n    while (true) {\n        auto mu = _prepare_list->get_mutation_by_decree(start + 1);\n        if (mu == nullptr || mu->data.header.ballot < lastBallot || !mu->is_logged())\n            break;\n\n        start++;\n        lastBallot = mu->data.header.ballot;\n    }\n    return start;\n}\n\nbool replica::verbose_commit_log() const { return _stub->_verbose_commit_log; }\n\nvoid replica::close()\n{\n    dassert_replica(status() == partition_status::PS_ERROR ||\n                        status() == partition_status::PS_INACTIVE ||\n                        _disk_migrator->status() == disk_migration_status::IDLE ||\n                        _disk_migrator->status() >= disk_migration_status::MOVED,\n                    \"invalid state(partition_status={}, migration_status={}) when calling \"\n                    \"replica close\",\n                    enum_to_string(status()),\n                    enum_to_string(_disk_migrator->status()));\n\n    uint64_t start_time = dsn_now_ms();\n\n    if (_checkpoint_timer != nullptr) {\n        _checkpoint_timer->cancel(true);\n        _checkpoint_timer = nullptr;\n    }\n\n    _tracker.cancel_outstanding_tasks();\n\n    cleanup_preparing_mutations(true);\n    dassert(_primary_states.is_cleaned(), \"primary context is not cleared\");\n\n    if (partition_status::PS_INACTIVE == status()) {\n        dassert(_secondary_states.is_cleaned(), \"secondary context is not cleared\");\n        dassert(_potential_secondary_states.is_cleaned(),\n                \"potential secondary context is not cleared\");\n        dassert(_split_states.is_cleaned(), \"partition split context is not cleared\");\n    }\n\n    // for partition_status::PS_ERROR, context cleanup is done here as they may block\n    else {\n        bool r = _secondary_states.cleanup(true);\n        dassert(r, \"secondary context is not cleared\");\n\n        r = _potential_secondary_states.cleanup(true);\n        dassert(r, \"potential secondary context is not cleared\");\n\n        r = _split_states.cleanup(true);\n        dassert_replica(r, \"partition split context is not cleared\");\n    }\n\n    if (_private_log != nullptr) {\n        _private_log->close();\n        _private_log = nullptr;\n    }\n\n    if (_app != nullptr) {\n        std::unique_ptr<replication_app_base> tmp_app = std::move(_app);\n        error_code err = tmp_app->close(false);\n        if (err != dsn::ERR_OK) {\n            dwarn(\"%s: close app failed, err = %s\", name(), err.to_string());\n        }\n    }\n\n    if (_disk_migrator->status() == disk_migration_status::MOVED) {\n        // this will update disk_migration_status::MOVED->disk_migration_status::CLOSED\n        _disk_migrator->update_replica_dir();\n    } else if (_disk_migrator->status() == disk_migration_status::CLOSED) {\n        _disk_migrator.reset();\n    }\n\n    _counter_private_log_size.clear();\n\n    // duplication_impl may have ongoing tasks.\n    // release it before release replica.\n    _duplication_mgr.reset();\n\n    _backup_mgr.reset();\n\n    _bulk_loader.reset();\n\n    _split_mgr.reset();\n\n    ddebug(\"%s: replica closed, time_used = %\" PRIu64 \"ms\", name(), dsn_now_ms() - start_time);\n}\n\nstd::string replica::query_manual_compact_state() const\n{\n    dassert_replica(_app != nullptr, \"\");\n    return _app->query_compact_state();\n}\n\nmanual_compaction_status::type replica::get_manual_compact_status() const\n{\n    dassert_replica(_app != nullptr, \"\");\n    return _app->query_compact_status();\n}\n\n// Replicas on the server which serves for the same table will share the same perf-counter.\n// For example counter `table.level.RPC_RRDB_RRDB_MULTI_PUT.latency(ns)@test_table` is shared by\n// all the replicas for `test_table`.\nvoid replica::init_table_level_latency_counters()\n{\n    int max_task_code = task_code::max();\n    _counters_table_level_latency.resize(max_task_code + 1);\n\n    for (int code = 0; code <= max_task_code; code++) {\n        _counters_table_level_latency[code] = nullptr;\n        if (get_storage_rpc_req_codes().find(task_code(code)) !=\n            get_storage_rpc_req_codes().end()) {\n            std::string counter_str = fmt::format(\n                \"table.level.{}.latency(ns)@{}\", task_code(code).to_string(), _app_info.app_name);\n            _counters_table_level_latency[code] =\n                dsn::perf_counters::instance()\n                    .get_app_counter(\"eon.replica\",\n                                     counter_str.c_str(),\n                                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                                     counter_str.c_str(),\n                                     true)\n                    .get();\n        }\n    }\n}\n\nvoid replica::on_detect_hotkey(const detect_hotkey_request &req, detect_hotkey_response &resp)\n{\n    _app->on_detect_hotkey(req, resp);\n}\n\nuint32_t replica::query_data_version() const\n{\n    dassert_replica(_app != nullptr, \"\");\n    return _app->query_data_version();\n}\n\nvoid replica::init_disk_tag()\n{\n    dsn::error_code err = _stub->_fs_manager.get_disk_tag(dir(), _disk_tag);\n    if (dsn::ERR_OK != err) {\n        derror_replica(\"get disk tag of {} failed: {}, init it to empty \", dir(), err);\n    }\n}\n\nerror_code replica::store_app_info(app_info &info, const std::string &path)\n{\n    replica_app_info new_info((app_info *)&info);\n    const auto &info_path = path.empty() ? utils::filesystem::path_combine(_dir, kAppInfo) : path;\n    auto err = new_info.store(info_path);\n    if (dsn_unlikely(err != ERR_OK)) {\n        derror_replica(\"failed to save app_info to {}, error = {}\", info_path, err);\n    }\n    return err;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replica interface, the base object which rdsn replicates\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n//\n// a replica is a replication partition of a serivce,\n// which handles all replication related issues\n// and on_request the app messages to replication_app_base\n// which is binded to this replication partition\n//\n\n#include <dsn/tool-api/uniq_timestamp_us.h>\n#include <dsn/tool-api/thread_access_checker.h>\n#include <dsn/cpp/serverlet.h>\n\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/dist/replication/replica_base.h>\n\n#include \"common/replication_common.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"prepare_list.h\"\n#include \"replica_context.h\"\n#include \"utils/throttling_controller.h\"\n\nnamespace dsn {\nnamespace security {\nclass access_controller;\n} // namespace security\nnamespace replication {\n\nclass replication_app_base;\nclass replica_stub;\nclass replica_duplicator_manager;\nclass replica_backup_manager;\nclass replica_bulk_loader;\nclass replica_split_manager;\nclass replica_disk_migrator;\nclass replica_follower;\n\nclass cold_backup_context;\ntypedef dsn::ref_ptr<cold_backup_context> cold_backup_context_ptr;\nstruct cold_backup_metadata;\n\nnamespace test {\nclass test_checker;\n}\n\n#define CHECK_REQUEST_IF_SPLITTING(op_type)                                                        \\\n    if (_validate_partition_hash) {                                                                \\\n        if (_split_mgr->should_reject_request()) {                                                 \\\n            response_client_##op_type(request, ERR_SPLITTING);                                     \\\n            _counter_recent_##op_type##_splitting_reject_count->increment();                       \\\n            return;                                                                                \\\n        }                                                                                          \\\n        if (!_split_mgr->check_partition_hash(                                                     \\\n                ((dsn::message_ex *)request)->header->client.partition_hash, #op_type)) {          \\\n            response_client_##op_type(request, ERR_PARENT_PARTITION_MISUSED);                      \\\n            return;                                                                                \\\n        }                                                                                          \\\n    }\n\nDSN_DECLARE_bool(reject_write_when_disk_insufficient);\n\n// get bool envs[name], return false if value is not bool\nbool get_bool_envs(const std::map<std::string, std::string> &envs,\n                   const std::string &name,\n                   /*out*/ bool &value);\n\nstruct deny_client\n{\n    bool read{false};\n    bool write{false};\n    // deny client and trigger client update partition config by response `ERR_INVALID_STATE`\n    bool reconfig{false};\n\n    void reset()\n    {\n        read = false;\n        write = false;\n        reconfig = false;\n    }\n\n    bool operator==(const deny_client &rhs) const\n    {\n        return (write == rhs.write && read == rhs.read && reconfig == rhs.reconfig);\n    }\n};\n\nclass replica : public serverlet<replica>, public ref_counter, public replica_base\n{\npublic:\n    ~replica(void);\n\n    //\n    //    routines for replica stub\n    //\n    static replica *load(replica_stub *stub, const char *dir);\n    // {parent_dir} is used in partition split for get_child_dir in replica_stub\n    static replica *newr(replica_stub *stub,\n                         gpid gpid,\n                         const app_info &app,\n                         bool restore_if_necessary,\n                         bool is_duplication_follower,\n                         const std::string &parent_dir = \"\");\n\n    // return true when the mutation is valid for the current replica\n    bool replay_mutation(mutation_ptr &mu, bool is_private);\n    void reset_prepare_list_after_replay();\n\n    // return false when update fails or replica is going to be closed\n    bool update_local_configuration_with_no_ballot_change(partition_status::type status);\n    void set_inactive_state_transient(bool t);\n    void check_state_completeness();\n    // error_code check_and_fix_private_log_completeness();\n\n    // close() will wait all traced tasks to finish\n    void close();\n\n    //\n    //    requests from clients\n    //\n    void on_client_write(message_ex *request, bool ignore_throttling = false);\n    void on_client_read(message_ex *request, bool ignore_throttling = false);\n\n    //\n    //    messages and tools from/for meta server\n    //\n    void on_config_proposal(configuration_update_request &proposal);\n    void on_config_sync(const app_info &info,\n                        const partition_configuration &config,\n                        split_status::type meta_split_status);\n    void on_cold_backup(const backup_request &request, /*out*/ backup_response &response);\n\n    //\n    //    messages from peers (primary or secondary)\n    //\n    void on_prepare(dsn::message_ex *request);\n    void on_learn(dsn::message_ex *msg, const learn_request &request);\n    void on_learn_completion_notification(const group_check_response &report,\n                                          /*out*/ learn_notify_response &response);\n    void on_learn_completion_notification_reply(error_code err,\n                                                group_check_response &&report,\n                                                learn_notify_response &&resp);\n    void on_add_learner(const group_check_request &request);\n    void on_remove(const replica_configuration &request);\n    void on_group_check(const group_check_request &request, /*out*/ group_check_response &response);\n\n    //\n    //    messsages from liveness monitor\n    //\n    void on_meta_server_disconnected();\n\n    //\n    //  routine for testing purpose only\n    //\n    void inject_error(error_code err);\n\n    //\n    //  local information query\n    //\n    const ballot &get_ballot() const { return _config.ballot; }\n    partition_status::type status() const { return _config.status; }\n    replication_app_base *get_app() { return _app.get(); }\n    const app_info *get_app_info() const { return &_app_info; }\n    decree max_prepared_decree() const { return _prepare_list->max_decree(); }\n    decree last_committed_decree() const { return _prepare_list->last_committed_decree(); }\n    decree last_prepared_decree() const;\n    decree last_durable_decree() const;\n    decree last_flushed_decree() const;\n    const std::string &dir() const { return _dir; }\n    uint64_t create_time_milliseconds() const { return _create_time_ms; }\n    const char *name() const { return replica_name(); }\n    mutation_log_ptr private_log() const { return _private_log; }\n    const replication_options *options() const { return _options; }\n    replica_stub *get_replica_stub() { return _stub; }\n    bool verbose_commit_log() const;\n    dsn::task_tracker *tracker() { return &_tracker; }\n\n    //\n    // Duplication\n    //\n    error_code trigger_manual_emergency_checkpoint(decree old_decree);\n    void on_query_last_checkpoint(learn_response &response);\n    replica_duplicator_manager *get_duplication_manager() const { return _duplication_mgr.get(); }\n    bool is_duplication_master() const { return _is_duplication_master; }\n    bool is_duplication_follower() const { return _is_duplication_follower; }\n\n    //\n    // Backup\n    //\n    replica_backup_manager *get_backup_manager() const { return _backup_mgr.get(); }\n\n    void update_last_checkpoint_generate_time();\n\n    //\n    // Bulk load\n    //\n    replica_bulk_loader *get_bulk_loader() const { return _bulk_loader.get(); }\n    inline uint64_t ingestion_duration_ms() const\n    {\n        return _bulk_load_ingestion_start_time_ms > 0\n                   ? (dsn_now_ms() - _bulk_load_ingestion_start_time_ms)\n                   : 0;\n    }\n\n    //\n    // Partition Split\n    //\n    replica_split_manager *get_split_manager() const { return _split_mgr.get(); }\n\n    //\n    // Disk migrator\n    //\n    replica_disk_migrator *disk_migrator() const { return _disk_migrator.get(); }\n\n    replica_follower *get_replica_follower() const { return _replica_follower.get(); };\n\n    //\n    // Statistics\n    //\n    void update_commit_qps(int count);\n\n    // routine for get extra envs from replica\n    const std::map<std::string, std::string> &get_replica_extra_envs() const { return _extra_envs; }\n\n    void set_disk_status(disk_status::type status) { _disk_status = status; }\n    bool disk_space_insufficient() { return _disk_status == disk_status::SPACE_INSUFFICIENT; }\n    disk_status::type get_disk_status() { return _disk_status; }\n    std::string get_replica_disk_tag() const { return _disk_tag; }\n\n    static const std::string kAppInfo;\n\nprotected:\n    // this method is marked protected to enable us to mock it in unit tests.\n    virtual decree max_gced_decree_no_lock() const;\n\nprivate:\n    // common helpers\n    void init_state();\n    void response_client_read(dsn::message_ex *request, error_code error);\n    void response_client_write(dsn::message_ex *request, error_code error);\n    void execute_mutation(mutation_ptr &mu);\n    mutation_ptr new_mutation(decree decree);\n\n    // initialization\n    replica(replica_stub *stub,\n            gpid gpid,\n            const app_info &app,\n            const char *dir,\n            bool need_restore,\n            bool is_duplication_follower = false);\n    error_code initialize_on_new();\n    error_code initialize_on_load();\n    error_code init_app_and_prepare_list(bool create_new);\n    decree get_replay_start_decree();\n\n    /////////////////////////////////////////////////////////////////\n    // 2pc\n    // `pop_all_committed_mutations = true` will be used for ingestion empty write\n    // See more about it in `replica_bulk_loader.cpp`\n    void\n    init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_committed_mutations = false);\n    void send_prepare_message(::dsn::rpc_address addr,\n                              partition_status::type status,\n                              const mutation_ptr &mu,\n                              int timeout_milliseconds,\n                              bool pop_all_committed_mutations = false,\n                              int64_t learn_signature = invalid_signature);\n    void on_append_log_completed(mutation_ptr &mu, error_code err, size_t size);\n    void on_prepare_reply(std::pair<mutation_ptr, partition_status::type> pr,\n                          error_code err,\n                          dsn::message_ex *request,\n                          dsn::message_ex *reply);\n    void do_possible_commit_on_primary(mutation_ptr &mu);\n    void ack_prepare_message(error_code err, mutation_ptr &mu);\n    void cleanup_preparing_mutations(bool wait);\n\n    /////////////////////////////////////////////////////////////////\n    // learning\n    void init_learn(uint64_t signature);\n    void on_learn_reply(error_code err, learn_request &&req, learn_response &&resp);\n    void on_copy_remote_state_completed(error_code err,\n                                        size_t size,\n                                        uint64_t copy_start_time,\n                                        learn_request &&req,\n                                        learn_response &&resp);\n    void on_learn_remote_state_completed(error_code err);\n    void handle_learning_error(error_code err, bool is_local_error);\n    error_code handle_learning_succeeded_on_primary(::dsn::rpc_address node,\n                                                    uint64_t learn_signature);\n    void notify_learn_completion();\n    error_code apply_learned_state_from_private_log(learn_state &state);\n\n    // Prepares in-memory mutations for the replica's learning.\n    // Returns false if there's no delta data in cache (aka prepare-list).\n    bool prepare_cached_learn_state(const learn_request &request,\n                                    decree learn_start_decree,\n                                    decree local_committed_decree,\n                                    /*out*/ remote_learner_state &learner_state,\n                                    /*out*/ learn_response &response,\n                                    /*out*/ bool &delayed_replay_prepare_list);\n\n    // Gets the position where this round of the learning process should begin.\n    // This method is called on primary-side.\n    // TODO(wutao1): mark it const\n    decree get_learn_start_decree(const learn_request &req);\n\n    // This method differs with `_private_log->max_gced_decree()` in that\n    // it also takes `learn/` dir into account, since the learned logs are\n    // a part of plog as well.\n    // This method is called on learner-side.\n    decree get_max_gced_decree_for_learn() const;\n\n    /////////////////////////////////////////////////////////////////\n    // failure handling\n    void handle_local_failure(error_code error);\n    void handle_remote_failure(partition_status::type status,\n                               ::dsn::rpc_address node,\n                               error_code error,\n                               const std::string &caused_by);\n\n    /////////////////////////////////////////////////////////////////\n    // reconfiguration\n    void assign_primary(configuration_update_request &proposal);\n    void add_potential_secondary(configuration_update_request &proposal);\n    void upgrade_to_secondary_on_primary(::dsn::rpc_address node);\n    void downgrade_to_secondary_on_primary(configuration_update_request &proposal);\n    void downgrade_to_inactive_on_primary(configuration_update_request &proposal);\n    void remove(configuration_update_request &proposal);\n    void update_configuration_on_meta_server(config_type::type type,\n                                             ::dsn::rpc_address node,\n                                             partition_configuration &newConfig);\n    void\n    on_update_configuration_on_meta_server_reply(error_code err,\n                                                 dsn::message_ex *request,\n                                                 dsn::message_ex *response,\n                                                 std::shared_ptr<configuration_update_request> req);\n    void replay_prepare_list();\n    bool is_same_ballot_status_change_allowed(partition_status::type olds,\n                                              partition_status::type news);\n\n    void update_app_envs(const std::map<std::string, std::string> &envs);\n    void update_app_envs_internal(const std::map<std::string, std::string> &envs);\n    void query_app_envs(/*out*/ std::map<std::string, std::string> &envs);\n\n    bool update_configuration(const partition_configuration &config);\n    bool update_local_configuration(const replica_configuration &config, bool same_ballot = false);\n    error_code update_init_info_ballot_and_decree();\n\n    /////////////////////////////////////////////////////////////////\n    // group check\n    void init_group_check();\n    void broadcast_group_check();\n    void on_group_check_reply(error_code err,\n                              const std::shared_ptr<group_check_request> &req,\n                              const std::shared_ptr<group_check_response> &resp);\n\n    /////////////////////////////////////////////////////////////////\n    // check timer for gc, checkpointing etc.\n    void on_checkpoint_timer();\n    void init_checkpoint(bool is_emergency);\n    error_code background_async_checkpoint(bool is_emergency);\n    error_code background_sync_checkpoint();\n    void catch_up_with_private_logs(partition_status::type s);\n    void on_checkpoint_completed(error_code err);\n    void on_copy_checkpoint_ack(error_code err,\n                                const std::shared_ptr<replica_configuration> &req,\n                                const std::shared_ptr<learn_response> &resp);\n    void on_copy_checkpoint_file_completed(error_code err,\n                                           size_t sz,\n                                           std::shared_ptr<learn_response> resp,\n                                           const std::string &chk_dir);\n\n    /////////////////////////////////////////////////////////////////\n    // cold backup\n    virtual void generate_backup_checkpoint(cold_backup_context_ptr backup_context);\n    void trigger_async_checkpoint_for_backup(cold_backup_context_ptr backup_context);\n    void wait_async_checkpoint_for_backup(cold_backup_context_ptr backup_context);\n    void local_create_backup_checkpoint(cold_backup_context_ptr backup_context);\n    void send_backup_request_to_secondary(const backup_request &request);\n    // set all cold_backup_state cancel/pause\n    void set_backup_context_cancel();\n    void clear_cold_backup_state();\n\n    /////////////////////////////////////////////////////////////////\n    // replica restore from backup\n    bool read_cold_backup_metadata(const std::string &file, cold_backup_metadata &backup_metadata);\n    // checkpoint on cold backup media maybe contain useless file,\n    // we should abandon these file base cold_backup_metadata\n    bool remove_useless_file_under_chkpt(const std::string &chkpt_dir,\n                                         const cold_backup_metadata &metadata);\n    void clear_restore_useless_files(const std::string &local_chkpt_dir,\n                                     const cold_backup_metadata &metadata);\n    error_code get_backup_metadata(dist::block_service::block_filesystem *fs,\n                                   const std::string &remote_chkpt_dir,\n                                   const std::string &local_chkpt_dir,\n                                   cold_backup_metadata &backup_metadata);\n    error_code download_checkpoint(const configuration_restore_request &req,\n                                   const std::string &remote_chkpt_dir,\n                                   const std::string &local_chkpt_dir);\n    dsn::error_code find_valid_checkpoint(const configuration_restore_request &req,\n                                          /*out*/ std::string &remote_chkpt_dir);\n    dsn::error_code restore_checkpoint();\n\n    dsn::error_code skip_restore_partition(const std::string &restore_dir);\n    void tell_meta_to_restore_rollback();\n\n    void report_restore_status_to_meta();\n\n    void update_restore_progress(uint64_t f_size);\n\n    // Used for remote command\n    // TODO: remove this interface and only expose the http interface\n    // now this remote commend will be used by `scripts/pegasus_manual_compact.sh`\n    std::string query_manual_compact_state() const;\n\n    manual_compaction_status::type get_manual_compact_status() const;\n\n    void init_table_level_latency_counters();\n\n    void on_detect_hotkey(const detect_hotkey_request &req, /*out*/ detect_hotkey_response &resp);\n\n    uint32_t query_data_version() const;\n\n    //\n    //    Throttling\n    //\n\n    /// return true if request is throttled.\n    bool throttle_write_request(message_ex *request);\n    bool throttle_read_request(message_ex *request);\n    bool throttle_backup_request(message_ex *request);\n    /// update throttling controllers\n    /// \\see replica::update_app_envs\n    void update_throttle_envs(const std::map<std::string, std::string> &envs);\n    void update_throttle_env_internal(const std::map<std::string, std::string> &envs,\n                                      const std::string &key,\n                                      throttling_controller &cntl);\n\n    // update allowed users for access controller\n    void update_ac_allowed_users(const std::map<std::string, std::string> &envs);\n\n    // update bool app envs\n    void update_bool_envs(const std::map<std::string, std::string> &envs,\n                          const std::string &name,\n                          /*out*/ bool &value);\n\n    // update envs allow_ingest_behind and store new app_info into file\n    void update_allow_ingest_behind(const std::map<std::string, std::string> &envs);\n\n    // update envs to deny client request\n    void update_deny_client(const std::map<std::string, std::string> &envs);\n\n    void init_disk_tag();\n\n    // store `info` into a file under `path` directory\n    // path = \"\" means using the default directory (`_dir`/.app_info)\n    error_code store_app_info(app_info &info, const std::string &path = \"\");\n\n    // clear replica if open failed\n    static replica *\n    clear_on_failure(replica_stub *stub, replica *rep, const std::string &path, const gpid &pid);\n\n    void update_app_max_replica_count(int32_t max_replica_count);\n\nprivate:\n    friend class ::dsn::replication::test::test_checker;\n    friend class ::dsn::replication::mutation_queue;\n    friend class ::dsn::replication::replica_stub;\n    friend class mock_replica;\n    friend class throttling_controller_test;\n    friend class replica_learn_test;\n    friend class replica_duplicator_manager;\n    friend class load_mutation;\n    friend class replica_split_test;\n    friend class replica_test_base;\n    friend class replica_test;\n    friend class replica_backup_manager;\n    friend class replica_bulk_loader;\n    friend class replica_split_manager;\n    friend class replica_disk_migrator;\n    friend class replica_disk_test;\n    friend class replica_disk_migrate_test;\n    friend class open_replica_test;\n    friend class replica_follower;\n\n    // replica configuration, updated by update_local_configuration ONLY\n    replica_configuration _config;\n    uint64_t _create_time_ms;\n    uint64_t _last_config_change_time_ms;\n    uint64_t _last_checkpoint_generate_time_ms;\n    uint64_t _next_checkpoint_interval_trigger_time_ms;\n\n    std::unique_ptr<prepare_list> _prepare_list;\n\n    // private prepare log (may be empty, depending on config)\n    mutation_log_ptr _private_log;\n\n    // local checkpoint timer for gc, checkpoint, etc.\n    dsn::task_ptr _checkpoint_timer;\n\n    // application\n    std::unique_ptr<replication_app_base> _app;\n\n    // constants\n    replica_stub *_stub;\n    std::string _dir;\n    std::string _disk_tag;\n    replication_options *_options;\n    app_info _app_info;\n    std::map<std::string, std::string> _extra_envs;\n\n    // uniq timestamp generator for this replica.\n    //\n    // we use it to generate an increasing timestamp for current replica\n    // and replicate it to secondary in preparing mutations, and secodaries'\n    // timestamp value will also updated if value from primary is larger\n    //\n    // as the timestamp is recorded in mutation log with mutations, we also update the value\n    // when do replaying\n    //\n    // in addition, as a replica can only be accessed by one thread,\n    // so the \"thread-unsafe\" generator works fine\n    uniq_timestamp_us _uniq_timestamp_us;\n\n    // replica status specific states\n    primary_context _primary_states;\n    secondary_context _secondary_states;\n    potential_secondary_context _potential_secondary_states;\n    // policy_name --> cold_backup_context\n    std::map<std::string, cold_backup_context_ptr> _cold_backup_contexts;\n    partition_split_context _split_states;\n\n    // timer task that running in replication-thread\n    std::atomic<uint64_t> _cold_backup_running_count;\n    std::atomic<uint64_t> _cold_backup_max_duration_time_ms;\n    std::atomic<uint64_t> _cold_backup_max_upload_file_size;\n\n    // record the progress of restore\n    int64_t _chkpt_total_size;\n    std::atomic<int64_t> _cur_download_size;\n    std::atomic<int32_t> _restore_progress;\n    // _restore_status:\n    //      ERR_OK: restore haven't encounter some error\n    //      ERR_CORRUPTION : data on backup media is damaged and we can not skip the damage data,\n    //                       so should restore rollback\n    //      ERR_IGNORE_DAMAGED_DATA : data on backup media is damaged but we can skip the damage\n    //                                data, so skip the damaged partition\n    dsn::error_code _restore_status;\n\n    bool _inactive_is_transient; // upgrade to P/S is allowed only iff true\n    bool _is_initializing;       // when initializing, switching to primary need to update ballot\n    deny_client _deny_client;    // if deny requests\n    throttling_controller _write_qps_throttling_controller;  // throttling by requests-per-second\n    throttling_controller _write_size_throttling_controller; // throttling by bytes-per-second\n    throttling_controller _read_qps_throttling_controller;\n    throttling_controller _backup_request_qps_throttling_controller;\n\n    // duplication\n    std::unique_ptr<replica_duplicator_manager> _duplication_mgr;\n    bool _is_manual_emergency_checkpointing{false};\n    bool _is_duplication_master{false};\n    bool _is_duplication_follower{false};\n\n    // backup\n    std::unique_ptr<replica_backup_manager> _backup_mgr;\n\n    // bulk load\n    std::unique_ptr<replica_bulk_loader> _bulk_loader;\n    // if replica in bulk load ingestion 2pc, will reject other write requests\n    bool _is_bulk_load_ingestion{false};\n    uint64_t _bulk_load_ingestion_start_time_ms{0};\n\n    // partition split\n    std::unique_ptr<replica_split_manager> _split_mgr;\n    bool _validate_partition_hash{false};\n\n    // disk migrator\n    std::unique_ptr<replica_disk_migrator> _disk_migrator;\n\n    std::unique_ptr<replica_follower> _replica_follower;\n\n    // perf counters\n    perf_counter_wrapper _counter_private_log_size;\n    perf_counter_wrapper _counter_recent_write_throttling_delay_count;\n    perf_counter_wrapper _counter_recent_write_throttling_reject_count;\n    perf_counter_wrapper _counter_recent_read_throttling_delay_count;\n    perf_counter_wrapper _counter_recent_read_throttling_reject_count;\n    perf_counter_wrapper _counter_recent_backup_request_throttling_delay_count;\n    perf_counter_wrapper _counter_recent_backup_request_throttling_reject_count;\n    perf_counter_wrapper _counter_recent_write_splitting_reject_count;\n    perf_counter_wrapper _counter_recent_read_splitting_reject_count;\n    perf_counter_wrapper _counter_recent_write_bulk_load_ingestion_reject_count;\n    std::vector<perf_counter *> _counters_table_level_latency;\n    perf_counter_wrapper _counter_dup_disabled_non_idempotent_write_count;\n    perf_counter_wrapper _counter_backup_request_qps;\n\n    dsn::task_tracker _tracker;\n    // the thread access checker\n    dsn::thread_access_checker _checker;\n\n    std::unique_ptr<security::access_controller> _access_controller;\n\n    disk_status::type _disk_status{disk_status::NORMAL};\n\n    bool _allow_ingest_behind{false};\n};\ntypedef dsn::ref_ptr<replica> replica_ptr;\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_2pc.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"bulk_load/replica_bulk_loader.h\"\n#include \"split/replica_split_manager.h\"\n#include \"runtime/security/access_controller.h\"\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_bool(\"replication\",\n                reject_write_when_disk_insufficient,\n                true,\n                \"reject client write requests if disk status is space insufficient\");\nDSN_TAG_VARIABLE(reject_write_when_disk_insufficient, FT_MUTABLE);\n\nvoid replica::on_client_write(dsn::message_ex *request, bool ignore_throttling)\n{\n    _checker.only_one_thread_access();\n\n    if (!_access_controller->allowed(request)) {\n        response_client_write(request, ERR_ACL_DENY);\n        return;\n    }\n\n    if (_deny_client.write) {\n        if (_deny_client.reconfig) {\n            // return ERR_INVALID_STATE will trigger client update config immediately\n            response_client_write(request, ERR_INVALID_STATE);\n            return;\n        }\n        // Do not reply any message to the peer client to let it timeout, it's OK coz some users\n        // may retry immediately when they got a not success code which will make the server side\n        // pressure more and more heavy.\n        return;\n    }\n\n    if (dsn_unlikely(_stub->_max_allowed_write_size &&\n                     request->body_size() > _stub->_max_allowed_write_size)) {\n        std::string request_info = _app->dump_write_request(request);\n        dwarn_replica(\"client from {} write request body size exceed threshold, request = [{}], \"\n                      \"request_body_size \"\n                      \"= {}, max_allowed_write_size = {}, it will be rejected!\",\n                      request->header->from_address.to_string(),\n                      request_info,\n                      request->body_size(),\n                      _stub->_max_allowed_write_size);\n        _stub->_counter_recent_write_size_exceed_threshold_count->increment();\n        response_client_write(request, ERR_INVALID_DATA);\n        return;\n    }\n\n    task_spec *spec = task_spec::get(request->rpc_code());\n    if (dsn_unlikely(nullptr == spec || request->rpc_code() == TASK_CODE_INVALID)) {\n        derror_f(\"recv message with unhandled rpc name {} from {}, trace_id = {}\",\n                 request->rpc_code().to_string(),\n                 request->header->from_address.to_string(),\n                 request->header->trace_id);\n        response_client_write(request, ERR_HANDLER_NOT_FOUND);\n        return;\n    }\n\n    if (is_duplication_master() && !spec->rpc_request_is_write_idempotent) {\n        // Ignore non-idempotent write, because duplication provides no guarantee of atomicity to\n        // make this write produce the same result on multiple clusters.\n        _counter_dup_disabled_non_idempotent_write_count->increment();\n        response_client_write(request, ERR_OPERATION_DISABLED);\n        return;\n    }\n\n    CHECK_REQUEST_IF_SPLITTING(write)\n\n    if (partition_status::PS_PRIMARY != status()) {\n        response_client_write(request, ERR_INVALID_STATE);\n        return;\n    }\n\n    if (FLAGS_reject_write_when_disk_insufficient &&\n        (disk_space_insufficient() || _primary_states.secondary_disk_space_insufficient())) {\n        response_client_write(request, ERR_DISK_INSUFFICIENT);\n        return;\n    }\n\n    if (_is_bulk_load_ingestion) {\n        if (request->rpc_code() != dsn::apps::RPC_RRDB_RRDB_BULK_LOAD) {\n            // reject write requests during ingestion\n            _counter_recent_write_bulk_load_ingestion_reject_count->increment();\n            response_client_write(request, ERR_OPERATION_DISABLED);\n        } else {\n            response_client_write(request, ERR_NO_NEED_OPERATE);\n        }\n        return;\n    }\n\n    if (request->rpc_code() == dsn::apps::RPC_RRDB_RRDB_BULK_LOAD) {\n        auto cur_bulk_load_status = _bulk_loader->get_bulk_load_status();\n        if (cur_bulk_load_status != bulk_load_status::BLS_DOWNLOADED &&\n            cur_bulk_load_status != bulk_load_status::BLS_INGESTING) {\n            derror_replica(\"receive bulk load ingestion request with wrong status({})\",\n                           enum_to_string(cur_bulk_load_status));\n            response_client_write(request, ERR_INVALID_STATE);\n            return;\n        }\n        ddebug_replica(\"receive bulk load ingestion request\");\n\n        // bulk load ingestion request requires that all secondaries should be alive\n        if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 <\n            _primary_states.membership.max_replica_count) {\n            response_client_write(request, ERR_NOT_ENOUGH_MEMBER);\n            return;\n        }\n        _is_bulk_load_ingestion = true;\n        _bulk_load_ingestion_start_time_ms = dsn_now_ms();\n    }\n\n    if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 <\n        _options->mutation_2pc_min_replica_count) {\n        response_client_write(request, ERR_NOT_ENOUGH_MEMBER);\n        return;\n    }\n\n    if (!ignore_throttling && throttle_write_request(request)) {\n        return;\n    }\n\n    dinfo(\"%s: got write request from %s\", name(), request->header->from_address.to_string());\n    auto mu = _primary_states.write_queue.add_work(request->rpc_code(), request, this);\n    if (mu) {\n        init_prepare(mu, false);\n    }\n}\n\nvoid replica::init_prepare(mutation_ptr &mu, bool reconciliation, bool pop_all_committed_mutations)\n{\n    dassert(partition_status::PS_PRIMARY == status(),\n            \"invalid partition_status, status = %s\",\n            enum_to_string(status()));\n\n    mu->_tracer->set_description(\"primary\");\n    ADD_POINT(mu->_tracer);\n\n    error_code err = ERR_OK;\n    uint8_t count = 0;\n    const auto request_count = mu->client_requests.size();\n    mu->data.header.last_committed_decree = last_committed_decree();\n\n    dsn_log_level_t level = LOG_LEVEL_INFORMATION;\n    if (mu->data.header.decree == invalid_decree) {\n        mu->set_id(get_ballot(), _prepare_list->max_decree() + 1);\n        // print a debug log if necessary\n        if (_options->prepare_decree_gap_for_debug_logging > 0 &&\n            mu->get_decree() % _options->prepare_decree_gap_for_debug_logging == 0)\n            level = LOG_LEVEL_DEBUG;\n        mu->set_timestamp(_uniq_timestamp_us.next());\n    } else {\n        mu->set_id(get_ballot(), mu->data.header.decree);\n    }\n\n    mu->_tracer->set_name(fmt::format(\"mutation[{}]\", mu->name()));\n    dlog(level,\n         \"%s: mutation %s init_prepare, mutation_tid=%\" PRIu64,\n         name(),\n         mu->name(),\n         mu->tid());\n\n    // child should prepare mutation synchronously\n    mu->set_is_sync_to_child(_primary_states.sync_send_write_request);\n\n    // check bounded staleness\n    if (mu->data.header.decree > last_committed_decree() + _options->staleness_for_commit) {\n        err = ERR_CAPACITY_EXCEEDED;\n        goto ErrOut;\n    }\n\n    // stop prepare bulk load ingestion if there are secondaries unalive\n    for (auto i = 0; i < request_count; ++i) {\n        const mutation_update &update = mu->data.updates[i];\n        if (update.code != dsn::apps::RPC_RRDB_RRDB_BULK_LOAD) {\n            break;\n        }\n        ddebug_replica(\"try to prepare bulk load mutation({})\", mu->name());\n        if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 <\n            _primary_states.membership.max_replica_count) {\n            err = ERR_NOT_ENOUGH_MEMBER;\n            break;\n        }\n    }\n    if (err != ERR_OK) {\n        goto ErrOut;\n    }\n\n    // stop prepare if there are too few replicas unless it's a reconciliation\n    // for reconciliation, we should ensure every prepared mutation to be committed\n    // please refer to PacificA paper\n    if (static_cast<int>(_primary_states.membership.secondaries.size()) + 1 <\n            _options->mutation_2pc_min_replica_count &&\n        !reconciliation) {\n        err = ERR_NOT_ENOUGH_MEMBER;\n        goto ErrOut;\n    }\n\n    dassert(mu->data.header.decree > last_committed_decree(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            mu->data.header.decree,\n            last_committed_decree());\n\n    // local prepare\n    err = _prepare_list->prepare(mu, partition_status::PS_PRIMARY, pop_all_committed_mutations);\n    if (err != ERR_OK) {\n        goto ErrOut;\n    }\n\n    // remote prepare\n    mu->set_prepare_ts();\n    mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size());\n    for (auto it = _primary_states.membership.secondaries.begin();\n         it != _primary_states.membership.secondaries.end();\n         ++it) {\n        send_prepare_message(*it,\n                             partition_status::PS_SECONDARY,\n                             mu,\n                             _options->prepare_timeout_ms_for_secondaries,\n                             pop_all_committed_mutations);\n    }\n\n    count = 0;\n    for (auto it = _primary_states.learners.begin(); it != _primary_states.learners.end(); ++it) {\n        if (it->second.prepare_start_decree != invalid_decree &&\n            mu->data.header.decree >= it->second.prepare_start_decree) {\n            send_prepare_message(it->first,\n                                 partition_status::PS_POTENTIAL_SECONDARY,\n                                 mu,\n                                 _options->prepare_timeout_ms_for_potential_secondaries,\n                                 pop_all_committed_mutations,\n                                 it->second.signature);\n            count++;\n        }\n    }\n    mu->set_left_potential_secondary_ack_count(count);\n\n    if (_split_mgr->is_splitting()) {\n        _split_mgr->copy_mutation(mu);\n    }\n\n    if (mu->is_logged()) {\n        do_possible_commit_on_primary(mu);\n    } else {\n        dassert(mu->data.header.log_offset == invalid_offset,\n                \"invalid log offset, offset = %\" PRId64,\n                mu->data.header.log_offset);\n        dassert(mu->log_task() == nullptr, \"\");\n        int64_t pending_size;\n        mu->log_task() = _private_log->append(mu,\n                                              LPC_WRITE_REPLICATION_LOG,\n                                              &_tracker,\n                                              std::bind(&replica::on_append_log_completed,\n                                                        this,\n                                                        mu,\n                                                        std::placeholders::_1,\n                                                        std::placeholders::_2),\n                                              get_gpid().thread_hash(),\n                                              &pending_size);\n        dassert(nullptr != mu->log_task(), \"\");\n        if (_options->log_shared_pending_size_throttling_threshold_kb > 0 &&\n            _options->log_shared_pending_size_throttling_delay_ms > 0 &&\n            pending_size >= _options->log_shared_pending_size_throttling_threshold_kb * 1024) {\n            int delay_ms = _options->log_shared_pending_size_throttling_delay_ms;\n            for (dsn::message_ex *r : mu->client_requests) {\n                if (r && r->io_session->delay_recv(delay_ms)) {\n                    dwarn(\"too large pending shared log (%\" PRId64 \"), \"\n                          \"delay traffic from %s for %d milliseconds\",\n                          pending_size,\n                          r->header->from_address.to_string(),\n                          delay_ms);\n                }\n            }\n        }\n    }\n\n    _primary_states.last_prepare_ts_ms = mu->prepare_ts_ms();\n    return;\n\nErrOut:\n    for (auto &r : mu->client_requests) {\n        response_client_write(r, err);\n    }\n    return;\n}\n\nvoid replica::send_prepare_message(::dsn::rpc_address addr,\n                                   partition_status::type status,\n                                   const mutation_ptr &mu,\n                                   int timeout_milliseconds,\n                                   bool pop_all_committed_mutations,\n                                   int64_t learn_signature)\n{\n    mu->_tracer->add_sub_tracer(addr.to_string());\n    ADD_POINT(mu->_tracer->sub_tracer(addr.to_string()));\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(\n        RPC_PREPARE, timeout_milliseconds, get_gpid().thread_hash());\n    replica_configuration rconfig;\n    _primary_states.get_replica_config(status, rconfig, learn_signature);\n    rconfig.__set_pop_all(pop_all_committed_mutations);\n    if (status == partition_status::PS_SECONDARY && _primary_states.sync_send_write_request) {\n        rconfig.__set_split_sync_to_child(true);\n    }\n\n    {\n        rpc_write_stream writer(msg);\n        marshall(writer, get_gpid(), DSF_THRIFT_BINARY);\n        marshall(writer, rconfig, DSF_THRIFT_BINARY);\n        mu->write_to(writer, msg);\n    }\n\n    mu->remote_tasks()[addr] =\n        rpc::call(addr,\n                  msg,\n                  &_tracker,\n                  [=](error_code err, dsn::message_ex *request, dsn::message_ex *reply) {\n                      on_prepare_reply(std::make_pair(mu, rconfig.status), err, request, reply);\n                  },\n                  get_gpid().thread_hash());\n\n    dinfo(\"%s: mutation %s send_prepare_message to %s as %s\",\n          name(),\n          mu->name(),\n          addr.to_string(),\n          enum_to_string(rconfig.status));\n}\n\nvoid replica::do_possible_commit_on_primary(mutation_ptr &mu)\n{\n    dassert(_config.ballot == mu->data.header.ballot,\n            \"invalid mutation ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            _config.ballot,\n            mu->data.header.ballot);\n    dassert(partition_status::PS_PRIMARY == status(),\n            \"invalid partition_status, status = %s\",\n            enum_to_string(status()));\n\n    if (mu->is_ready_for_commit()) {\n        _prepare_list->commit(mu->data.header.decree, COMMIT_ALL_READY);\n    }\n}\n\nvoid replica::on_prepare(dsn::message_ex *request)\n{\n    _checker.only_one_thread_access();\n\n    replica_configuration rconfig;\n    mutation_ptr mu;\n    bool pop_all_committed_mutations = false;\n\n    {\n        rpc_read_stream reader(request);\n        unmarshall(reader, rconfig, DSF_THRIFT_BINARY);\n        mu = mutation::read_from(reader, request);\n        mu->set_is_sync_to_child(rconfig.split_sync_to_child);\n        pop_all_committed_mutations = rconfig.pop_all;\n        rconfig.split_sync_to_child = false;\n        rconfig.pop_all = false;\n    }\n\n    decree decree = mu->data.header.decree;\n\n    dinfo(\"%s: mutation %s on_prepare\", name(), mu->name());\n    mu->_tracer->set_name(fmt::format(\"mutation[{}]\", mu->name()));\n    mu->_tracer->set_description(\"secondary\");\n    ADD_POINT(mu->_tracer);\n\n    dassert(mu->data.header.pid == rconfig.pid,\n            \"(%d.%d) VS (%d.%d)\",\n            mu->data.header.pid.get_app_id(),\n            mu->data.header.pid.get_partition_index(),\n            rconfig.pid.get_app_id(),\n            rconfig.pid.get_partition_index());\n    dassert(mu->data.header.ballot == rconfig.ballot,\n            \"invalid mutation ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            mu->data.header.ballot,\n            rconfig.ballot);\n\n    if (mu->data.header.ballot < get_ballot()) {\n        derror(\"%s: mutation %s on_prepare skipped due to old view\", name(), mu->name());\n        // no need response because the rpc should have been cancelled on primary in this case\n        return;\n    }\n\n    // update configuration when necessary\n    else if (rconfig.ballot > get_ballot()) {\n        if (!update_local_configuration(rconfig)) {\n            derror(\"%s: mutation %s on_prepare failed as update local configuration failed, state \"\n                   \"= %s\",\n                   name(),\n                   mu->name(),\n                   enum_to_string(status()));\n            ack_prepare_message(ERR_INVALID_STATE, mu);\n            return;\n        }\n    }\n\n    if (partition_status::PS_INACTIVE == status() || partition_status::PS_ERROR == status()) {\n        derror(\"%s: mutation %s on_prepare failed as invalid replica state, state = %s\",\n               name(),\n               mu->name(),\n               enum_to_string(status()));\n        ack_prepare_message((partition_status::PS_INACTIVE == status() && _inactive_is_transient)\n                                ? ERR_INACTIVE_STATE\n                                : ERR_INVALID_STATE,\n                            mu);\n        return;\n    } else if (partition_status::PS_POTENTIAL_SECONDARY == status()) {\n        // new learning process\n        if (rconfig.learner_signature != _potential_secondary_states.learning_version) {\n            derror(\"%s: mutation %s on_prepare failed as unmatched learning signature, state = %s\"\n                   \", old_signature[%016\" PRIx64 \"] vs new_signature[%016\" PRIx64 \"]\",\n                   name(),\n                   mu->name(),\n                   enum_to_string(status()),\n                   _potential_secondary_states.learning_version,\n                   rconfig.learner_signature);\n            handle_learning_error(ERR_INVALID_STATE, false);\n            ack_prepare_message(ERR_INVALID_STATE, mu);\n            return;\n        }\n\n        auto learning_status = _potential_secondary_states.learning_status;\n        if (learning_status != learner_status::LearningWithPrepare &&\n            learning_status != learner_status::LearningSucceeded) {\n            // if prepare requests are received when learning status is changing from\n            // LearningWithoutPrepare to LearningWithPrepare, we ack ERR_TRY_AGAIN.\n            error_code ack_code =\n                (learning_status == learner_status::LearningWithoutPrepare ? ERR_TRY_AGAIN\n                                                                           : ERR_INVALID_STATE);\n            derror(\"%s: mutation %s on_prepare skipped as invalid learning status, state = %s, \"\n                   \"learning_status = %s, ack %s\",\n                   name(),\n                   mu->name(),\n                   enum_to_string(status()),\n                   enum_to_string(learning_status),\n                   ack_code.to_string());\n            ack_prepare_message(ack_code, mu);\n            return;\n        }\n    }\n\n    dassert(rconfig.status == status(),\n            \"invalid status, %s VS %s\",\n            enum_to_string(rconfig.status),\n            enum_to_string(status()));\n    if (decree <= last_committed_decree()) {\n        ack_prepare_message(ERR_OK, mu);\n        return;\n    }\n\n    // real prepare start\n    _uniq_timestamp_us.try_update(mu->data.header.timestamp);\n    auto mu2 = _prepare_list->get_mutation_by_decree(decree);\n    if (mu2 != nullptr && mu2->data.header.ballot == mu->data.header.ballot) {\n        if (mu2->is_logged()) {\n            // already logged, just response ERR_OK\n            ack_prepare_message(ERR_OK, mu);\n        } else {\n            // not logged, combine duplicate request to old mutation\n            mu2->add_prepare_request(request);\n        }\n        return;\n    }\n\n    error_code err = _prepare_list->prepare(mu, status(), pop_all_committed_mutations);\n    dassert(err == ERR_OK, \"prepare mutation failed, err = %s\", err.to_string());\n\n    if (partition_status::PS_POTENTIAL_SECONDARY == status() ||\n        partition_status::PS_SECONDARY == status()) {\n        dassert(mu->data.header.decree <=\n                    last_committed_decree() + _options->max_mutation_count_in_prepare_list,\n                \"%\" PRId64 \" VS %\" PRId64 \"(%\" PRId64 \" + %d)\",\n                mu->data.header.decree,\n                last_committed_decree() + _options->max_mutation_count_in_prepare_list,\n                last_committed_decree(),\n                _options->max_mutation_count_in_prepare_list);\n    } else {\n        derror(\"%s: mutation %s on_prepare failed as invalid replica state, state = %s\",\n               name(),\n               mu->name(),\n               enum_to_string(status()));\n        ack_prepare_message(ERR_INVALID_STATE, mu);\n        return;\n    }\n\n    if (_split_mgr->is_splitting()) {\n        _split_mgr->copy_mutation(mu);\n    }\n\n    dassert(mu->log_task() == nullptr, \"\");\n    mu->log_task() = _private_log->append(mu,\n                                          LPC_WRITE_REPLICATION_LOG,\n                                          &_tracker,\n                                          std::bind(&replica::on_append_log_completed,\n                                                    this,\n                                                    mu,\n                                                    std::placeholders::_1,\n                                                    std::placeholders::_2),\n                                          get_gpid().thread_hash());\n    dassert(nullptr != mu->log_task(), \"\");\n}\n\nvoid replica::on_append_log_completed(mutation_ptr &mu, error_code err, size_t size)\n{\n    _checker.only_one_thread_access();\n\n    dinfo(\"%s: append shared log completed for mutation %s, size = %u, err = %s\",\n          name(),\n          mu->name(),\n          size,\n          err.to_string());\n\n    ADD_POINT(mu->_tracer);\n\n    if (err == ERR_OK) {\n        mu->set_logged();\n    } else {\n        derror(\"%s: append shared log failed for mutation %s, err = %s\",\n               name(),\n               mu->name(),\n               err.to_string());\n    }\n\n    // skip old mutations\n    if (mu->data.header.ballot >= get_ballot() && status() != partition_status::PS_INACTIVE) {\n        switch (status()) {\n        case partition_status::PS_PRIMARY:\n            if (err == ERR_OK) {\n                do_possible_commit_on_primary(mu);\n            } else {\n                handle_local_failure(err);\n            }\n            break;\n        case partition_status::PS_SECONDARY:\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            if (err != ERR_OK) {\n                handle_local_failure(err);\n            }\n            // always ack\n            ack_prepare_message(err, mu);\n            // all mutations with lower decree must be ready\n            _prepare_list->commit(mu->data.header.last_committed_decree, COMMIT_TO_DECREE_HARD);\n            break;\n        case partition_status::PS_PARTITION_SPLIT:\n            if (err != ERR_OK) {\n                handle_local_failure(err);\n            }\n            _split_mgr->ack_parent(err, mu);\n            break;\n        case partition_status::PS_ERROR:\n            break;\n        default:\n            dassert(false, \"invalid partition_status, status = %s\", enum_to_string(status()));\n            break;\n        }\n    }\n\n    if (err != ERR_OK) {\n        // mutation log failure, propagate to all replicas\n        _stub->handle_log_failure(err);\n    }\n}\n\nvoid replica::on_prepare_reply(std::pair<mutation_ptr, partition_status::type> pr,\n                               error_code err,\n                               dsn::message_ex *request,\n                               dsn::message_ex *reply)\n{\n    _checker.only_one_thread_access();\n\n    mutation_ptr mu = pr.first;\n    partition_status::type target_status = pr.second;\n\n    // skip callback for old mutations\n    if (partition_status::PS_PRIMARY != status() || mu->data.header.ballot < get_ballot() ||\n        mu->get_decree() <= last_committed_decree())\n        return;\n\n    dassert(mu->data.header.ballot == get_ballot(),\n            \"%s: invalid mutation ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            mu->name(),\n            mu->data.header.ballot,\n            get_ballot());\n\n    ::dsn::rpc_address node = request->to_address;\n    partition_status::type st = _primary_states.get_node_status(node);\n\n    // handle reply\n    prepare_ack resp;\n\n    // handle error\n    if (err != ERR_OK) {\n        resp.err = err;\n    } else {\n        ::dsn::unmarshall(reply, resp);\n    }\n\n    auto send_prepare_tracer = mu->_tracer->sub_tracer(request->to_address.to_string());\n    APPEND_EXTERN_POINT(send_prepare_tracer, resp.receive_timestamp, \"remote_receive\");\n    APPEND_EXTERN_POINT(send_prepare_tracer, resp.response_timestamp, \"remote_reply\");\n    ADD_CUSTOM_POINT(send_prepare_tracer, resp.err.to_string());\n\n    if (resp.err == ERR_OK) {\n        dinfo(\"%s: mutation %s on_prepare_reply from %s, appro_data_bytes = %d, \"\n              \"target_status = %s, err = %s\",\n              name(),\n              mu->name(),\n              node.to_string(),\n              mu->appro_data_bytes(),\n              enum_to_string(target_status),\n              resp.err.to_string());\n    } else {\n        derror(\"%s: mutation %s on_prepare_reply from %s, appro_data_bytes = %d, \"\n               \"target_status = %s, err = %s\",\n               name(),\n               mu->name(),\n               node.to_string(),\n               mu->appro_data_bytes(),\n               enum_to_string(target_status),\n               resp.err.to_string());\n    }\n\n    if (resp.err == ERR_OK) {\n        dassert(resp.ballot == get_ballot(),\n                \"invalid response ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n                resp.ballot,\n                get_ballot());\n        dassert(resp.decree == mu->data.header.decree,\n                \"invalid response decree, %\" PRId64 \" VS %\" PRId64 \"\",\n                resp.decree,\n                mu->data.header.decree);\n\n        switch (target_status) {\n        case partition_status::PS_SECONDARY:\n            dassert(_primary_states.check_exist(node, partition_status::PS_SECONDARY),\n                    \"invalid secondary node address, address = %s\",\n                    node.to_string());\n            dassert(mu->left_secondary_ack_count() > 0, \"%u\", mu->left_secondary_ack_count());\n            if (0 == mu->decrease_left_secondary_ack_count()) {\n                do_possible_commit_on_primary(mu);\n            }\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            dassert(mu->left_potential_secondary_ack_count() > 0,\n                    \"%u\",\n                    mu->left_potential_secondary_ack_count());\n            if (0 == mu->decrease_left_potential_secondary_ack_count()) {\n                do_possible_commit_on_primary(mu);\n            }\n            break;\n        default:\n            dwarn(\"%s: mutation %s prepare ack skipped coz the node is now inactive\",\n                  name(),\n                  mu->name());\n            break;\n        }\n    }\n\n    // failure handling\n    else {\n        // retry for INACTIVE or TRY_AGAIN if there is still time.\n        if (resp.err == ERR_INACTIVE_STATE || resp.err == ERR_TRY_AGAIN) {\n            int prepare_timeout_ms = (target_status == partition_status::PS_SECONDARY\n                                          ? _options->prepare_timeout_ms_for_secondaries\n                                          : _options->prepare_timeout_ms_for_potential_secondaries);\n            int delay_time_ms = 5; // delay some time before retry to avoid sending too frequently\n            if (mu->is_prepare_close_to_timeout(delay_time_ms + 2, prepare_timeout_ms)) {\n                derror(\"%s: mutation %s do not retry prepare to %s for no enought time left, \"\n                       \"prepare_ts_ms = %\" PRIu64 \", prepare_timeout_ms = %d, now_ms = %\" PRIu64,\n                       name(),\n                       mu->name(),\n                       node.to_string(),\n                       mu->prepare_ts_ms(),\n                       prepare_timeout_ms,\n                       dsn_now_ms());\n            } else {\n                ddebug(\"%s: mutation %s retry prepare to %s after %d ms\",\n                       name(),\n                       mu->name(),\n                       node.to_string(),\n                       delay_time_ms);\n                int64_t learn_signature = invalid_signature;\n                if (target_status == partition_status::PS_POTENTIAL_SECONDARY) {\n                    auto it = _primary_states.learners.find(node);\n                    if (it != _primary_states.learners.end()) {\n                        learn_signature = it->second.signature;\n                    }\n                }\n                tasking::enqueue(\n                    LPC_DELAY_PREPARE,\n                    &_tracker,\n                    [this, node, target_status, mu, prepare_timeout_ms, learn_signature] {\n                        // need to check status/ballot/decree before sending prepare message,\n                        // because the config may have been changed or the mutation may have been\n                        // committed during the delay time.\n                        if (status() == partition_status::PS_PRIMARY &&\n                            get_ballot() == mu->data.header.ballot &&\n                            mu->get_decree() > last_committed_decree()) {\n                            send_prepare_message(node,\n                                                 target_status,\n                                                 mu,\n                                                 prepare_timeout_ms,\n                                                 false,\n                                                 learn_signature);\n                        }\n                    },\n                    get_gpid().thread_hash(),\n                    std::chrono::milliseconds(delay_time_ms));\n                return;\n            }\n        }\n\n        _stub->_counter_replicas_recent_prepare_fail_count->increment();\n\n        // make sure this is before any later commit ops\n        // because now commit ops may lead to new prepare ops\n        // due to replication throttling\n        handle_remote_failure(st, node, resp.err, \"prepare\");\n\n        // note targetStatus and (curent) status may diff\n        if (target_status == partition_status::PS_POTENTIAL_SECONDARY) {\n            dassert(mu->left_potential_secondary_ack_count() > 0,\n                    \"%u\",\n                    mu->left_potential_secondary_ack_count());\n            if (0 == mu->decrease_left_potential_secondary_ack_count()) {\n                do_possible_commit_on_primary(mu);\n            }\n        }\n    }\n}\n\nvoid replica::ack_prepare_message(error_code err, mutation_ptr &mu)\n{\n    ADD_POINT(mu->_tracer);\n    prepare_ack resp;\n    resp.pid = get_gpid();\n    resp.err = err;\n    resp.ballot = get_ballot();\n    resp.decree = mu->data.header.decree;\n\n    resp.__set_receive_timestamp(mu->_tracer->start_time());\n    resp.__set_response_timestamp(dsn_now_ns());\n\n    // for partition_status::PS_POTENTIAL_SECONDARY ONLY\n    resp.last_committed_decree_in_app = _app->last_committed_decree();\n    resp.last_committed_decree_in_prepare_list = last_committed_decree();\n\n    const std::vector<dsn::message_ex *> &prepare_requests = mu->prepare_requests();\n    dassert(!prepare_requests.empty(), \"mutation = %s\", mu->name());\n\n    if (err == ERR_OK) {\n        if (mu->is_child_acked()) {\n            dinfo_replica(\"mutation {} ack_prepare_message, err = {}\", mu->name(), err);\n            for (auto &request : prepare_requests) {\n                reply(request, resp);\n            }\n        }\n        return;\n    }\n    // only happened when prepare failed during partition split child copy mutation synchronously\n    if (mu->is_error_acked()) {\n        dwarn_replica(\"mutation {} has been ack_prepare_message, err = {}\", mu->name(), err);\n        return;\n    }\n\n    dwarn_replica(\"mutation {} ack_prepare_message, err = {}\", mu->name(), err);\n    if (mu->is_sync_to_child()) {\n        mu->set_error_acked();\n    }\n    for (auto &request : prepare_requests) {\n        reply(request, resp);\n    }\n}\n\nvoid replica::cleanup_preparing_mutations(bool wait)\n{\n    decree start = last_committed_decree() + 1;\n    decree end = _prepare_list->max_decree();\n\n    for (decree decree = start; decree <= end; decree++) {\n        mutation_ptr mu = _prepare_list->get_mutation_by_decree(decree);\n        if (mu != nullptr) {\n            mu->clear_prepare_or_commit_tasks();\n\n            //\n            // make sure the buffers from mutations are valid for underlying aio\n            //\n            if (wait) {\n                if (dsn_unlikely(_private_log != nullptr)) {\n                    _private_log->flush();\n                }\n                mu->wait_log_task();\n            }\n        }\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_backup.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <boost/lexical_cast.hpp>\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/utils/time_utils.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/flags.h>\n\n#include \"block_service/block_service_manager.h\"\n#include \"backup/replica_backup_manager.h\"\n#include \"backup/cold_backup_context.h\"\n\n#include \"replica.h\"\n#include \"replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_uint64(\"replication\",\n                  max_concurrent_uploading_file_count,\n                  10,\n                  \"concurrent uploading file count to block service\");\n\nvoid replica::on_cold_backup(const backup_request &request, /*out*/ backup_response &response)\n{\n    _checker.only_one_thread_access();\n\n    const std::string &policy_name = request.policy.policy_name;\n    auto backup_id = request.backup_id;\n    cold_backup_context_ptr new_context(\n        new cold_backup_context(this, request, FLAGS_max_concurrent_uploading_file_count));\n\n    ddebug_replica(\"{}: received cold backup request, partition_status = {}\",\n                   new_context->name,\n                   enum_to_string(status()));\n\n    if (status() == partition_status::type::PS_PRIMARY ||\n        status() == partition_status::type::PS_SECONDARY) {\n        cold_backup_context_ptr backup_context = nullptr;\n        auto find = _cold_backup_contexts.find(policy_name);\n        if (find != _cold_backup_contexts.end()) {\n            backup_context = find->second;\n        } else {\n            /// TODO: policy may change provider\n            dist::block_service::block_filesystem *block_service =\n                _stub->_block_service_manager.get_or_create_block_filesystem(\n                    request.policy.backup_provider_type);\n            if (block_service == nullptr) {\n                derror(\"%s: create cold backup block service failed, provider_type = %s, response \"\n                       \"ERR_INVALID_PARAMETERS\",\n                       new_context->name,\n                       request.policy.backup_provider_type.c_str());\n                response.err = ERR_INVALID_PARAMETERS;\n                return;\n            }\n            auto r = _cold_backup_contexts.insert(std::make_pair(policy_name, new_context));\n            dassert(r.second, \"\");\n            backup_context = r.first->second;\n            backup_context->block_service = block_service;\n            backup_context->backup_root = request.__isset.backup_path\n                                              ? dsn::utils::filesystem::path_combine(\n                                                    request.backup_path, _options->cold_backup_root)\n                                              : _options->cold_backup_root;\n        }\n\n        dcheck_eq_replica(backup_context->request.policy.policy_name, policy_name);\n        cold_backup_status backup_status = backup_context->status();\n\n        if (backup_context->request.backup_id < backup_id || backup_status == ColdBackupCanceled) {\n            if (backup_status == ColdBackupCheckpointing) {\n                ddebug(\"%s: delay clearing obsoleted cold backup context, cause backup_status == \"\n                       \"ColdBackupCheckpointing\",\n                       new_context->name);\n                tasking::enqueue(LPC_REPLICATION_COLD_BACKUP,\n                                 &_tracker,\n                                 [this, request]() {\n                                     backup_response response;\n                                     on_cold_backup(request, response);\n                                 },\n                                 get_gpid().thread_hash(),\n                                 std::chrono::seconds(100));\n            } else {\n                // TODO(wutao1): deleting cold backup context should be\n                //               extracted as a function like try_delete_cold_backup_context;\n                // clear obsoleted backup context firstly\n                ddebug(\"%s: clear obsoleted cold backup context, old_backup_id = %\" PRId64\n                       \", old_backup_status = %s\",\n                       new_context->name,\n                       backup_context->request.backup_id,\n                       cold_backup_status_to_string(backup_status));\n                backup_context->cancel();\n                _cold_backup_contexts.erase(policy_name);\n                // go to another round\n                on_cold_backup(request, response);\n            }\n            return;\n        }\n\n        if (backup_context->request.backup_id > backup_id) {\n            // backup_id is outdated\n            derror(\"%s: request outdated cold backup, current_backup_id = %\" PRId64\n                   \", response ERR_VERSION_OUTDATED\",\n                   new_context->name,\n                   backup_context->request.backup_id);\n            response.err = ERR_VERSION_OUTDATED;\n            return;\n        }\n\n        // for secondary, request is already filtered by primary, so if\n        //      request is repeated, so generate_backup_checkpoint is already running, we do\n        //      nothing;\n        //      request is new, we should call generate_backup_checkpoint;\n\n        // TODO: if secondary's status have changed, how to process the _cold_backup_state,\n        // and how to process the backup_status, cancel/pause\n        if (status() == partition_status::PS_SECONDARY) {\n            if (backup_status == ColdBackupInvalid) {\n                // new backup_request, should set status to ColdBackupChecked to allow secondary\n                // can start to checkpoint\n                backup_context->start_check();\n                backup_context->complete_check(false);\n                if (backup_context->start_checkpoint()) {\n                    _stub->_counter_cold_backup_recent_start_count->increment();\n                    tasking::enqueue(\n                        LPC_BACKGROUND_COLD_BACKUP, &_tracker, [this, backup_context]() {\n                            generate_backup_checkpoint(backup_context);\n                        });\n                }\n            }\n            return;\n        }\n\n        send_backup_request_to_secondary(request);\n\n        if (backup_status == ColdBackupChecking || backup_status == ColdBackupCheckpointing ||\n            backup_status == ColdBackupUploading) {\n            // do nothing\n            ddebug(\"%s: backup is busy, status = %s, progress = %d, response ERR_BUSY\",\n                   backup_context->name,\n                   cold_backup_status_to_string(backup_status),\n                   backup_context->progress());\n            response.err = ERR_BUSY;\n        } else if (backup_status == ColdBackupInvalid && backup_context->start_check()) {\n            _stub->_counter_cold_backup_recent_start_count->increment();\n            ddebug(\"%s: start checking backup on remote, response ERR_BUSY\", backup_context->name);\n            tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP, nullptr, [backup_context]() {\n                backup_context->check_backup_on_remote();\n            });\n            response.err = ERR_BUSY;\n        } else if (backup_status == ColdBackupChecked && backup_context->start_checkpoint()) {\n            // start generating checkpoint\n            ddebug(\"%s: start generating checkpoint, response ERR_BUSY\", backup_context->name);\n            tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP, &_tracker, [this, backup_context]() {\n                generate_backup_checkpoint(backup_context);\n            });\n            response.err = ERR_BUSY;\n        } else if ((backup_status == ColdBackupCheckpointed || backup_status == ColdBackupPaused) &&\n                   backup_context->start_upload()) {\n            // start uploading checkpoint\n            ddebug(\"%s: start uploading checkpoint, response ERR_BUSY\", backup_context->name);\n            tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP, nullptr, [backup_context]() {\n                backup_context->upload_checkpoint_to_remote();\n            });\n            response.err = ERR_BUSY;\n        } else if (backup_status == ColdBackupFailed) {\n            derror(\"%s: upload checkpoint failed, reason = %s, response ERR_LOCAL_APP_FAILURE\",\n                   backup_context->name,\n                   backup_context->reason());\n            response.err = ERR_LOCAL_APP_FAILURE;\n            backup_context->cancel();\n            _cold_backup_contexts.erase(policy_name);\n        } else if (backup_status == ColdBackupCompleted) {\n            ddebug(\"%s: upload checkpoint completed, response ERR_OK\", backup_context->name);\n            _backup_mgr->send_clear_request_to_secondaries(backup_context->request.pid,\n                                                           policy_name);\n\n            // clear local checkpoint dirs in background thread\n            _backup_mgr->background_clear_backup_checkpoint(policy_name);\n            response.err = ERR_OK;\n        } else {\n            dwarn(\n                \"%s: unhandled case, handle_status = %s, real_time_status = %s, response ERR_BUSY\",\n                backup_context->name,\n                cold_backup_status_to_string(backup_status),\n                cold_backup_status_to_string(backup_context->status()));\n            response.err = ERR_BUSY;\n        }\n\n        response.progress = backup_context->progress();\n        response.checkpoint_total_size = backup_context->get_checkpoint_total_size();\n        ddebug(\"%s: backup progress is %d\", backup_context->name, response.progress);\n    } else {\n        derror(\n            \"%s: invalid state for cold backup, partition_status = %s, response ERR_INVALID_STATE\",\n            new_context->name,\n            enum_to_string(status()));\n        response.err = ERR_INVALID_STATE;\n    }\n}\n\nvoid replica::send_backup_request_to_secondary(const backup_request &request)\n{\n    for (const auto &target_address : _primary_states.membership.secondaries) {\n        // primary will send backup_request to secondary periodically\n        // so, we shouldn't handle the response\n        rpc::call_one_way_typed(target_address, RPC_COLD_BACKUP, request, get_gpid().thread_hash());\n    }\n}\n\n// backup/backup.<policy_name>.<backup_id>.<decree>.<timestamp>\nstatic std::string backup_get_dir_name(const std::string &policy_name,\n                                       int64_t backup_id,\n                                       int64_t decree,\n                                       int64_t timestamp)\n{\n    char buffer[256];\n    sprintf(buffer,\n            \"backup.%s.%\" PRId64 \".%\" PRId64 \".%\" PRId64 \"\",\n            policy_name.c_str(),\n            backup_id,\n            decree,\n            timestamp);\n    return std::string(buffer);\n}\n\n// backup/backup_tmp.<policy_name>.<backup_id>.<timestamp>\nstatic std::string\nbackup_get_tmp_dir_name(const std::string &policy_name, int64_t backup_id, int64_t timestamp)\n{\n    char buffer[256];\n    sprintf(\n        buffer, \"backup_tmp.%s.%\" PRId64 \".%\" PRId64 \"\", policy_name.c_str(), backup_id, timestamp);\n    return std::string(buffer);\n}\n\n// returns:\n//   0 : not related\n//   1 : related (belong to this policy but not belong to this backup_context)\n//   2 : valid (belong to this policy and belong to this backup_context)\nstatic int is_related_or_valid_checkpoint(const std::string &chkpt_dirname,\n                                          const cold_backup_context_ptr &backup_context)\n{\n    std::vector<std::string> strs;\n    ::dsn::utils::split_args(chkpt_dirname.c_str(), strs, '.');\n    if (strs.size() == 4 && strs[0] == std::string(\"backup_tmp\") &&\n        strs[1] == backup_context->request.policy.policy_name) {\n        // backup_tmp.<policy_name>.<backup_id>.<timestamp>\n        // refer to backup_get_tmp_dir_name().\n        int64_t backup_id = boost::lexical_cast<int64_t>(strs[2]);\n        if (backup_id < backup_context->request.backup_id) {\n            // it belongs to old backup_context, we can remove it safely.\n            return 1;\n        }\n    } else if (strs.size() == 5 && strs[0] == std::string(\"backup_tmp\") &&\n               strs[4] == std::string(\"tmp\") &&\n               strs[1] == backup_context->request.policy.policy_name) {\n        // backup_tmp.<policy_name>.<backup_id>.<timestamp>.tmp\n        // refer to CheckpointImpl::CreateCheckpointQuick().\n        int64_t backup_id = boost::lexical_cast<int64_t>(strs[2]);\n        if (backup_id < backup_context->request.backup_id) {\n            // it belongs to old backup_context, we can remove it safely.\n            return 1;\n        }\n    } else if (strs.size() == 5 && strs[0] == std::string(\"backup\") &&\n               strs[1] == backup_context->request.policy.policy_name) {\n        // backup.<policy_name>.<backup_id>.<decree>.<timestamp>\n        // refer to backup_get_dir_name().\n        int64_t backup_id = boost::lexical_cast<int64_t>(strs[2]);\n        // here, we only need policy_name and backup_id to verify whether chkpt_dirname belong\n        // to this backup_context.\n        if (backup_id == backup_context->request.backup_id) {\n            // it belongs to this backup_context.\n            return 2;\n        } else if (backup_id < backup_context->request.backup_id) {\n            // it belongs to old backup_context, we can remove it safely.\n            return 1;\n        }\n    } else {\n        // unknown dir, ignore it\n        dwarn(\n            \"%s: found a invalid checkpoint dir(%s)\", backup_context->name, chkpt_dirname.c_str());\n    }\n    return 0;\n}\n\n// filter backup checkpoint under 'dir'\n//  - find the valid backup checkpoint dir if exist\n//  - find all the backup checkpoint belong to this policy, mainly obsolete backup checkpoint\nstatic bool filter_checkpoint(const std::string &dir,\n                              const cold_backup_context_ptr &backup_context,\n                              /*out*/ std::vector<std::string> &related_chkpt_dirs,\n                              /*out*/ std::string &valid_chkpt_dir)\n{\n    valid_chkpt_dir.clear();\n    related_chkpt_dirs.clear();\n    // list sub dirs\n    std::vector<std::string> sub_dirs;\n    if (!utils::filesystem::get_subdirectories(dir, sub_dirs, false)) {\n        derror(\"%s: list sub dirs of dir %s failed\", backup_context->name, dir.c_str());\n        return false;\n    }\n\n    for (std::string &d : sub_dirs) {\n        std::string dirname = utils::filesystem::get_file_name(d);\n        int ret = is_related_or_valid_checkpoint(dirname, backup_context);\n        if (ret == 1) {\n            related_chkpt_dirs.emplace_back(std::move(dirname));\n        } else if (ret == 2) {\n            dassert(valid_chkpt_dir.empty(),\n                    \"%s: there are two valid backup checkpoint dir, %s VS %s\",\n                    backup_context->name,\n                    valid_chkpt_dir.c_str(),\n                    dirname.c_str());\n            valid_chkpt_dir = dirname;\n        }\n    }\n    return true;\n}\n\nstatic bool\nstatistic_file_infos_under_dir(const std::string &dir,\n                               /*out*/ std::vector<std::pair<std::string, int64_t>> &file_infos,\n                               /*out*/ int64_t &total_size)\n{\n    std::vector<std::string> sub_files;\n    if (!utils::filesystem::get_subfiles(dir, sub_files, false)) {\n        derror(\"list sub files of dir %s failed\", dir.c_str());\n        return false;\n    }\n\n    total_size = 0;\n    file_infos.clear();\n\n    for (std::string &file : sub_files) {\n        std::pair<std::string, int64_t> file_info;\n\n        if (!utils::filesystem::file_size(file, file_info.second)) {\n            derror(\"get file size of %s failed\", file.c_str());\n            return false;\n        }\n        file_info.first = utils::filesystem::get_file_name(file);\n        total_size += file_info.second;\n\n        file_infos.emplace_back(std::move(file_info));\n    }\n    return true;\n}\n\nstatic bool backup_parse_dir_name(const char *name,\n                                  std::string &policy_name,\n                                  int64_t &backup_id,\n                                  int64_t &decree,\n                                  int64_t &timestamp)\n{\n    std::vector<std::string> strs;\n    ::dsn::utils::split_args(name, strs, '.');\n    if (strs.size() < 5) {\n        return false;\n    } else {\n        policy_name = strs[1];\n        backup_id = boost::lexical_cast<int64_t>(strs[2]);\n        decree = boost::lexical_cast<int64_t>(strs[3]);\n        timestamp = boost::lexical_cast<int64_t>(strs[4]);\n        return (std::string(name) ==\n                backup_get_dir_name(policy_name, backup_id, decree, timestamp));\n    }\n}\n\n// run in REPLICATION_LONG thread\n// Effection:\n// - may ignore_checkpoint() if in invalid status\n// - may fail_checkpoint() if some error occurs\n// - may complete_checkpoint() and schedule on_cold_backup() if backup checkpoint dir is already\n// exist\n// - may schedule trigger_async_checkpoint_for_backup() if backup checkpoint dir is not exist\nvoid replica::generate_backup_checkpoint(cold_backup_context_ptr backup_context)\n{\n    if (backup_context->status() != ColdBackupCheckpointing) {\n        ddebug(\"%s: ignore generating backup checkpoint because backup_status = %s\",\n               backup_context->name,\n               cold_backup_status_to_string(backup_context->status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    // prepare back dir\n    auto backup_dir = _app->backup_dir();\n    if (!utils::filesystem::directory_exists(backup_dir) &&\n        !utils::filesystem::create_directory(backup_dir)) {\n        derror(\"%s: create backup dir %s failed\", backup_context->name, backup_dir.c_str());\n        backup_context->fail_checkpoint(\"create backup dir failed\");\n        return;\n    }\n\n    std::vector<std::string> related_backup_chkpt_dirname;\n    std::string valid_backup_chkpt_dirname;\n    if (!filter_checkpoint(\n            backup_dir, backup_context, related_backup_chkpt_dirname, valid_backup_chkpt_dirname)) {\n        // encounter some error, just return\n        backup_context->fail_checkpoint(\"list sub backup dir failed\");\n        return;\n    }\n    if (!valid_backup_chkpt_dirname.empty()) {\n        std::vector<std::pair<std::string, int64_t>> file_infos;\n        int64_t total_size = 0;\n        std::string valid_chkpt_full_path =\n            utils::filesystem::path_combine(backup_dir, valid_backup_chkpt_dirname);\n        // parse checkpoint dirname\n        std::string policy_name;\n        int64_t backup_id = 0, decree = 0, timestamp = 0;\n        dassert(backup_parse_dir_name(\n                    valid_backup_chkpt_dirname.c_str(), policy_name, backup_id, decree, timestamp),\n                \"%s: valid chekpoint dirname %s\",\n                backup_context->name,\n                valid_backup_chkpt_dirname.c_str());\n\n        if (statistic_file_infos_under_dir(valid_chkpt_full_path, file_infos, total_size)) {\n            backup_context->checkpoint_decree = decree;\n            backup_context->checkpoint_timestamp = timestamp;\n            backup_context->checkpoint_dir = valid_chkpt_full_path;\n            for (std::pair<std::string, int64_t> &p : file_infos) {\n                backup_context->checkpoint_files.emplace_back(std::move(p.first));\n                backup_context->checkpoint_file_sizes.emplace_back(std::move(p.second));\n            }\n            backup_context->checkpoint_file_total_size = total_size;\n            backup_context->complete_checkpoint();\n\n            ddebug(\"%s: backup checkpoint aleady exist, dir = %s, file_count = %d, total_size = \"\n                   \"%\" PRId64,\n                   backup_context->name,\n                   backup_context->checkpoint_dir.c_str(),\n                   (int)file_infos.size(),\n                   total_size);\n            // TODO: in primary, this will make the request send to secondary again\n            tasking::enqueue(LPC_REPLICATION_COLD_BACKUP,\n                             &_tracker,\n                             [this, backup_context]() {\n                                 backup_response response;\n                                 on_cold_backup(backup_context->request, response);\n                             },\n                             get_gpid().thread_hash());\n        } else {\n            backup_context->fail_checkpoint(\"statistic file info under checkpoint failed\");\n            return;\n        }\n    } else {\n        ddebug(\"%s: backup checkpoint not exist, start to trigger async checkpoint\",\n               backup_context->name);\n        tasking::enqueue(\n            LPC_REPLICATION_COLD_BACKUP,\n            &_tracker,\n            [this, backup_context]() { trigger_async_checkpoint_for_backup(backup_context); },\n            get_gpid().thread_hash());\n    }\n\n    // clear related but not valid checkpoint\n    for (const std::string &dirname : related_backup_chkpt_dirname) {\n        std::string full_path = utils::filesystem::path_combine(backup_dir, dirname);\n        ddebug(\"%s: found obsolete backup checkpoint dir(%s), remove it\",\n               backup_context->name,\n               full_path.c_str());\n        if (!utils::filesystem::remove_path(full_path)) {\n            dwarn(\"%s: remove obsolete backup checkpoint dir(%s) failed\",\n                  backup_context->name,\n                  full_path.c_str());\n        }\n    }\n}\n\n// run in REPLICATION thread\n// Effection:\n// - may ignore_checkpoint() if in invalid status\n// - may fail_checkpoint() if some error occurs\n// - may trigger async checkpoint and invoke wait_async_checkpoint_for_backup()\nvoid replica::trigger_async_checkpoint_for_backup(cold_backup_context_ptr backup_context)\n{\n    _checker.only_one_thread_access();\n\n    if (backup_context->status() != ColdBackupCheckpointing) {\n        ddebug(\"%s: ignore triggering async checkpoint because backup_status = %s\",\n               backup_context->name,\n               cold_backup_status_to_string(backup_context->status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY) {\n        ddebug(\"%s: ignore triggering async checkpoint because partition_status = %s\",\n               backup_context->name,\n               enum_to_string(status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    decree durable_decree = last_durable_decree();\n    if (backup_context->checkpoint_decree > 0 &&\n        durable_decree >= backup_context->checkpoint_decree) {\n        // checkpoint done\n    } else if (backup_context->checkpoint_decree > 0 &&\n               backup_context->durable_decree_when_checkpoint == durable_decree) {\n        // already triggered, just wait\n        char time_buf[20];\n        dsn::utils::time_ms_to_date_time(backup_context->checkpoint_timestamp, time_buf, 20);\n        ddebug(\"%s: do not trigger async checkpoint because it is already triggered, \"\n               \"checkpoint_decree = %\" PRId64 \", checkpoint_timestamp = %\" PRId64 \" (%s), \"\n               \"durable_decree_when_checkpoint = %\" PRId64,\n               backup_context->name,\n               backup_context->checkpoint_decree,\n               backup_context->checkpoint_timestamp,\n               time_buf,\n               backup_context->durable_decree_when_checkpoint);\n    } else { // backup_context->checkpoint_decree == 0 ||\n             // backup_context->durable_decree_when_checkpoint != durable_decree\n        if (backup_context->checkpoint_decree == 0) {\n            // first trigger\n            backup_context->checkpoint_decree = last_committed_decree();\n        } else { // backup_context->durable_decree_when_checkpoint != durable_decree\n            // checkpoint generated, but is behind checkpoint_decree, need trigger again\n            dassert(backup_context->durable_decree_when_checkpoint < durable_decree,\n                    \"durable_decree_when_checkpoint(%\" PRId64 \") < durable_decree(%\" PRId64 \")\",\n                    backup_context->durable_decree_when_checkpoint,\n                    durable_decree);\n            ddebug(\"%s: need trigger async checkpoint again\", backup_context->name);\n        }\n        backup_context->checkpoint_timestamp = dsn_now_ms();\n        backup_context->durable_decree_when_checkpoint = durable_decree;\n        char time_buf[20];\n        dsn::utils::time_ms_to_date_time(backup_context->checkpoint_timestamp, time_buf, 20);\n        ddebug(\"%s: trigger async checkpoint, \"\n               \"checkpoint_decree = %\" PRId64 \", checkpoint_timestamp = %\" PRId64 \" (%s), \"\n               \"durable_decree_when_checkpoint = %\" PRId64,\n               backup_context->name,\n               backup_context->checkpoint_decree,\n               backup_context->checkpoint_timestamp,\n               time_buf,\n               backup_context->durable_decree_when_checkpoint);\n        init_checkpoint(true);\n    }\n\n    // after triggering init_checkpoint, we just wait until it finish\n    wait_async_checkpoint_for_backup(backup_context);\n}\n\n// run in REPLICATION thread\n// Effection:\n// - may ignore_checkpoint() if in invalid status\n// - may delay some time and schedule trigger_async_checkpoint_for_backup() if async checkpoint not\n// completed\n// - may schedule local_create_backup_checkpoint if async checkpoint completed\nvoid replica::wait_async_checkpoint_for_backup(cold_backup_context_ptr backup_context)\n{\n    _checker.only_one_thread_access();\n\n    if (backup_context->status() != ColdBackupCheckpointing) {\n        ddebug(\"%s: ignore waiting async checkpoint because backup_status = %s\",\n               backup_context->name,\n               cold_backup_status_to_string(backup_context->status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY) {\n        ddebug(\"%s: ignore waiting async checkpoint because partition_status = %s\",\n               backup_context->name,\n               enum_to_string(status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    decree du = last_durable_decree();\n    if (du < backup_context->checkpoint_decree) {\n        ddebug(\"%s: async checkpoint not done, we just wait it done, \"\n               \"last_durable_decree = %\" PRId64 \", backup_checkpoint_decree = %\" PRId64,\n               backup_context->name,\n               du,\n               backup_context->checkpoint_decree);\n        tasking::enqueue(\n            LPC_REPLICATION_COLD_BACKUP,\n            &_tracker,\n            [this, backup_context]() { trigger_async_checkpoint_for_backup(backup_context); },\n            get_gpid().thread_hash(),\n            std::chrono::seconds(10));\n    } else {\n        ddebug(\"%s: async checkpoint done, last_durable_decree = %\" PRId64\n               \", backup_context->checkpoint_decree = %\" PRId64,\n               backup_context->name,\n               du,\n               backup_context->checkpoint_decree);\n        tasking::enqueue(LPC_BACKGROUND_COLD_BACKUP, &_tracker, [this, backup_context]() {\n            local_create_backup_checkpoint(backup_context);\n        });\n    }\n}\n\n// run in REPLICATION_LONG thread\n// Effection:\n// - may ignore_checkpoint() if in invalid status\n// - may fail_checkpoint() if some error occurs\n// - may complete_checkpoint() and schedule on_cold_backup() if checkpoint dir is successfully\n// copied\nvoid replica::local_create_backup_checkpoint(cold_backup_context_ptr backup_context)\n{\n    if (backup_context->status() != ColdBackupCheckpointing) {\n        ddebug(\"%s: ignore generating backup checkpoint because backup_status = %s\",\n               backup_context->name,\n               cold_backup_status_to_string(backup_context->status()));\n        backup_context->ignore_checkpoint();\n        return;\n    }\n\n    // the real checkpoint decree may be larger than backup_context->checkpoint_decree,\n    // so we need copy checkpoint to backup_checkpoint_tmp_dir_path, and then rename it.\n    std::string backup_checkpoint_tmp_dir_path = utils::filesystem::path_combine(\n        _app->backup_dir(),\n        backup_get_tmp_dir_name(backup_context->request.policy.policy_name,\n                                backup_context->request.backup_id,\n                                backup_context->checkpoint_timestamp));\n    int64_t last_decree = 0;\n    dsn::error_code err =\n        _app->copy_checkpoint_to_dir(backup_checkpoint_tmp_dir_path.c_str(), &last_decree);\n    if (err != ERR_OK) {\n        // try local_create_backup_checkpoint 10s later\n        ddebug(\"%s: create backup checkpoint failed with err = %s, try call \"\n               \"local_create_backup_checkpoint 10s later\",\n               backup_context->name,\n               err.to_string());\n        utils::filesystem::remove_path(backup_checkpoint_tmp_dir_path);\n        tasking::enqueue(\n            LPC_BACKGROUND_COLD_BACKUP,\n            &_tracker,\n            [this, backup_context]() { local_create_backup_checkpoint(backup_context); },\n            0,\n            std::chrono::seconds(10));\n    } else {\n        dassert(last_decree >= backup_context->checkpoint_decree,\n                \"%\" PRId64 \" VS %\" PRId64 \"\",\n                last_decree,\n                backup_context->checkpoint_decree);\n        backup_context->checkpoint_decree = last_decree; // update to real decree\n        std::string backup_checkpoint_dir_path = utils::filesystem::path_combine(\n            _app->backup_dir(),\n            backup_get_dir_name(backup_context->request.policy.policy_name,\n                                backup_context->request.backup_id,\n                                backup_context->checkpoint_decree,\n                                backup_context->checkpoint_timestamp));\n        if (!utils::filesystem::rename_path(backup_checkpoint_tmp_dir_path,\n                                            backup_checkpoint_dir_path)) {\n            derror(\"%s: rename checkpoint dir(%s) to dir(%s) failed\",\n                   backup_context->name,\n                   backup_checkpoint_tmp_dir_path.c_str(),\n                   backup_checkpoint_dir_path.c_str());\n            utils::filesystem::remove_path(backup_checkpoint_tmp_dir_path);\n            utils::filesystem::remove_path(backup_checkpoint_dir_path);\n            backup_context->fail_checkpoint(\"rename checkpoint dir failed\");\n            return;\n        }\n\n        std::vector<std::pair<std::string, int64_t>> file_infos;\n        int64_t total_size = 0;\n        if (!statistic_file_infos_under_dir(backup_checkpoint_dir_path, file_infos, total_size)) {\n            derror(\"%s: statistic file info under dir(%s) failed\",\n                   backup_context->name,\n                   backup_checkpoint_dir_path.c_str());\n            backup_context->fail_checkpoint(\"statistic file info under dir failed\");\n            return;\n        }\n\n        ddebug(\"%s: generate backup checkpoint succeed, dir = %s, file_count = %d, total_size = \"\n               \"%\" PRId64,\n               backup_context->name,\n               backup_checkpoint_dir_path.c_str(),\n               (int)file_infos.size(),\n               total_size);\n        backup_context->checkpoint_dir = backup_checkpoint_dir_path;\n        for (std::pair<std::string, int64_t> &pair : file_infos) {\n            backup_context->checkpoint_files.emplace_back(std::move(pair.first));\n            backup_context->checkpoint_file_sizes.emplace_back(std::move(pair.second));\n        }\n        backup_context->checkpoint_file_total_size = total_size;\n        backup_context->complete_checkpoint();\n        tasking::enqueue(LPC_REPLICATION_COLD_BACKUP,\n                         &_tracker,\n                         [this, backup_context]() {\n                             backup_response response;\n                             on_cold_backup(backup_context->request, response);\n                         },\n                         get_gpid().thread_hash());\n    }\n}\n\nvoid replica::set_backup_context_cancel()\n{\n    for (auto &pair : _cold_backup_contexts) {\n        pair.second->cancel();\n        ddebug(\"%s: cancel backup progress, backup_request = %s\",\n               name(),\n               boost::lexical_cast<std::string>(pair.second->request).c_str());\n    }\n}\n\nvoid replica::clear_cold_backup_state() { _cold_backup_contexts.clear(); }\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_check.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replica membership state periodical checking\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n\n#include \"duplication/replica_duplicator_manager.h\"\n#include \"split/replica_split_manager.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DECLARE_bool(empty_write_disabled);\n\nvoid replica::init_group_check()\n{\n    FAIL_POINT_INJECT_F(\"replica_init_group_check\", [](dsn::string_view) {});\n\n    _checker.only_one_thread_access();\n\n    ddebug(\"%s: init group check\", name());\n\n    if (partition_status::PS_PRIMARY != status() || _options->group_check_disabled)\n        return;\n\n    dassert(nullptr == _primary_states.group_check_task, \"\");\n    _primary_states.group_check_task =\n        tasking::enqueue_timer(LPC_GROUP_CHECK,\n                               &_tracker,\n                               [this] { broadcast_group_check(); },\n                               std::chrono::milliseconds(_options->group_check_interval_ms),\n                               get_gpid().thread_hash());\n}\n\nvoid replica::broadcast_group_check()\n{\n    FAIL_POINT_INJECT_F(\"replica_broadcast_group_check\", [](dsn::string_view) {});\n\n    dassert(nullptr != _primary_states.group_check_task, \"\");\n\n    ddebug(\"%s: start to broadcast group check\", name());\n\n    if (_primary_states.group_check_pending_replies.size() > 0) {\n        dwarn(\"%s: %u group check replies are still pending when doing next round check, cancel \"\n              \"first\",\n              name(),\n              static_cast<int>(_primary_states.group_check_pending_replies.size()));\n\n        for (auto it = _primary_states.group_check_pending_replies.begin();\n             it != _primary_states.group_check_pending_replies.end();\n             ++it) {\n            it->second->cancel(true);\n        }\n        _primary_states.group_check_pending_replies.clear();\n    }\n\n    for (auto it = _primary_states.statuses.begin(); it != _primary_states.statuses.end(); ++it) {\n        if (it->first == _stub->_primary_address)\n            continue;\n\n        ::dsn::rpc_address addr = it->first;\n        std::shared_ptr<group_check_request> request(new group_check_request);\n\n        request->app = _app_info;\n        request->node = addr;\n        _primary_states.get_replica_config(it->second, request->config);\n        request->last_committed_decree = last_committed_decree();\n        request->__set_confirmed_decree(_duplication_mgr->min_confirmed_decree());\n        // set split context in group_check_request\n        if (request->config.status == partition_status::PS_SECONDARY &&\n            _split_mgr->get_meta_split_status() != split_status::NOT_SPLIT) {\n            request->__set_meta_split_status(_split_mgr->get_meta_split_status());\n            if (_split_mgr->is_splitting()) {\n                request->__set_child_gpid(_split_mgr->get_child_gpid());\n            }\n        }\n\n        if (request->config.status == partition_status::PS_POTENTIAL_SECONDARY) {\n            auto it = _primary_states.learners.find(addr);\n            dassert(\n                it != _primary_states.learners.end(), \"learner %s is missing\", addr.to_string());\n            request->config.learner_signature = it->second.signature;\n        }\n\n        ddebug(\"%s: send group check to %s with state %s\",\n               name(),\n               addr.to_string(),\n               enum_to_string(it->second));\n\n        dsn::task_ptr callback_task =\n            rpc::call(addr,\n                      RPC_GROUP_CHECK,\n                      *request,\n                      &_tracker,\n                      [=](error_code err, group_check_response &&resp) {\n                          auto alloc = std::make_shared<group_check_response>(std::move(resp));\n                          on_group_check_reply(err, request, alloc);\n                      },\n                      std::chrono::milliseconds(0),\n                      get_gpid().thread_hash());\n\n        _primary_states.group_check_pending_replies[addr] = callback_task;\n    }\n\n    // send empty prepare when necessary\n    if (!FLAGS_empty_write_disabled &&\n        dsn_now_ms() >= _primary_states.last_prepare_ts_ms + _options->group_check_interval_ms) {\n        mutation_ptr mu = new_mutation(invalid_decree);\n        mu->add_client_request(RPC_REPLICATION_WRITE_EMPTY, nullptr);\n        init_prepare(mu, false);\n    }\n}\n\nvoid replica::on_group_check(const group_check_request &request,\n                             /*out*/ group_check_response &response)\n{\n    _checker.only_one_thread_access();\n\n    ddebug_replica(\"process group check, primary = {}, ballot = {}, status = {}, \"\n                   \"last_committed_decree = {}, confirmed_decree = {}\",\n                   request.config.primary.to_string(),\n                   request.config.ballot,\n                   enum_to_string(request.config.status),\n                   request.last_committed_decree,\n                   request.__isset.confirmed_decree ? request.confirmed_decree : invalid_decree);\n\n    if (request.config.ballot < get_ballot()) {\n        response.err = ERR_VERSION_OUTDATED;\n        dwarn(\"%s: on_group_check reply %s\", name(), response.err.to_string());\n        return;\n    } else if (request.config.ballot > get_ballot()) {\n        if (!update_local_configuration(request.config)) {\n            response.err = ERR_INVALID_STATE;\n            dwarn(\"%s: on_group_check reply %s\", name(), response.err.to_string());\n            return;\n        }\n    } else if (is_same_ballot_status_change_allowed(status(), request.config.status)) {\n        update_local_configuration(request.config, true);\n    }\n\n    _duplication_mgr->update_confirmed_decree_if_secondary(request.confirmed_decree);\n\n    switch (status()) {\n    case partition_status::PS_INACTIVE:\n        break;\n    case partition_status::PS_SECONDARY:\n        if (request.last_committed_decree > last_committed_decree()) {\n            _prepare_list->commit(request.last_committed_decree, COMMIT_TO_DECREE_HARD);\n        }\n        // the group check may trigger start/finish/cancel/pause a split on the secondary.\n        _split_mgr->trigger_secondary_parent_split(request, response);\n        response.__set_disk_status(_disk_status);\n        break;\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        init_learn(request.config.learner_signature);\n        break;\n    case partition_status::PS_ERROR:\n        break;\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", enum_to_string(status()));\n    }\n\n    response.pid = get_gpid();\n    response.node = _stub->_primary_address;\n    response.err = ERR_OK;\n    if (status() == partition_status::PS_ERROR) {\n        response.err = ERR_INVALID_STATE;\n        dwarn(\"%s: on_group_check reply %s\", name(), response.err.to_string());\n    }\n\n    response.last_committed_decree_in_app = _app->last_committed_decree();\n    response.last_committed_decree_in_prepare_list = last_committed_decree();\n    response.learner_status_ = _potential_secondary_states.learning_status;\n    response.learner_signature = _potential_secondary_states.learning_version;\n}\n\nvoid replica::on_group_check_reply(error_code err,\n                                   const std::shared_ptr<group_check_request> &req,\n                                   const std::shared_ptr<group_check_response> &resp)\n{\n    _checker.only_one_thread_access();\n\n    if (partition_status::PS_PRIMARY != status() || req->config.ballot < get_ballot()) {\n        return;\n    }\n\n    auto r = _primary_states.group_check_pending_replies.erase(req->node);\n    dassert(r == 1, \"invalid node address, address = %s\", req->node.to_string());\n\n    if (err != ERR_OK || resp->err != ERR_OK) {\n        if (ERR_OK == err) {\n            err = resp->err;\n        }\n        handle_remote_failure(req->config.status, req->node, err, \"group check\");\n        _stub->_counter_replicas_recent_group_check_fail_count->increment();\n    } else {\n        if (resp->learner_status_ == learner_status::LearningSucceeded &&\n            req->config.status == partition_status::PS_POTENTIAL_SECONDARY) {\n            handle_learning_succeeded_on_primary(req->node, resp->learner_signature);\n        }\n        _split_mgr->primary_parent_handle_stop_split(req, resp);\n        if (req->config.status == partition_status::PS_SECONDARY) {\n            _primary_states.secondary_disk_status[req->node] = resp->disk_status;\n        }\n    }\n}\n\nvoid replica::inject_error(error_code err)\n{\n    tasking::enqueue(LPC_REPLICATION_ERROR,\n                     &_tracker,\n                     [this, err]() { handle_local_failure(err); },\n                     get_gpid().thread_hash());\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_chkpt.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     checkpoint the replicated app\n *\n * Revision history:\n *     Nov., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"duplication/replica_duplicator_manager.h\"\n#include \"split/replica_split_manager.h\"\n#include \"dsn/utility/fail_point.h\"\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/chrono_literals.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\nconst std::string kCheckpointFolderPrefix /*NOLINT*/ = \"checkpoint\";\n\nstatic std::string checkpoint_folder(int64_t decree)\n{\n    return fmt::format(\"{}.{}\", kCheckpointFolderPrefix, decree);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica::on_checkpoint_timer()\n{\n    _checker.only_one_thread_access();\n\n    if (dsn_now_ms() > _next_checkpoint_interval_trigger_time_ms) {\n        // we trigger emergency checkpoint if no checkpoint generated for a long time\n        ddebug(\"%s: trigger emergency checkpoint by checkpoint_max_interval_hours, \"\n               \"config_interval = %dh (%\" PRIu64 \"ms), random_interval = %\" PRIu64 \"ms\",\n               name(),\n               _options->checkpoint_max_interval_hours,\n               _options->checkpoint_max_interval_hours * 3600000UL,\n               _next_checkpoint_interval_trigger_time_ms - _last_checkpoint_generate_time_ms);\n        init_checkpoint(true);\n    } else {\n        ddebug(\"%s: trigger non-emergency checkpoint\",\n               name(),\n               _options->checkpoint_max_interval_hours);\n        init_checkpoint(false);\n    }\n\n    if (_private_log) {\n        mutation_log_ptr plog = _private_log;\n\n        decree last_durable_decree = _app->last_durable_decree();\n        decree min_confirmed_decree = _duplication_mgr->min_confirmed_decree();\n        decree cleanable_decree = last_durable_decree;\n        int64_t valid_start_offset = _app->init_info().init_offset_in_private_log;\n\n        if (min_confirmed_decree >= 0) {\n            // Do not rely on valid_start_offset for GC during duplication.\n            // cleanable_decree is the only GC trigger.\n            valid_start_offset = 0;\n            if (min_confirmed_decree < last_durable_decree) {\n                ddebug_replica(\"gc_private {}: delay gc for duplication: min_confirmed_decree({}) \"\n                               \"last_durable_decree({})\",\n                               enum_to_string(status()),\n                               min_confirmed_decree,\n                               last_durable_decree);\n                cleanable_decree = min_confirmed_decree;\n            } else {\n                ddebug_replica(\"gc_private {}: min_confirmed_decree({}) last_durable_decree({})\",\n                               enum_to_string(status()),\n                               min_confirmed_decree,\n                               last_durable_decree);\n            }\n        } else if (is_duplication_master()) {\n            // unsure if the logs can be dropped, because min_confirmed_decree\n            // is currently unavailable\n            ddebug_replica(\n                \"gc_private {}: skip gc because confirmed duplication progress is unknown\",\n                enum_to_string(status()));\n            return;\n        }\n\n        tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,\n                         &_tracker,\n                         [this, plog, cleanable_decree, valid_start_offset] {\n                             // run in background thread to avoid file deletion operation blocking\n                             // replication thread.\n                             if (status() == partition_status::PS_ERROR ||\n                                 status() == partition_status::PS_INACTIVE)\n                                 return;\n                             plog->garbage_collection(\n                                 get_gpid(),\n                                 cleanable_decree,\n                                 valid_start_offset,\n                                 (int64_t)_options->log_private_reserve_max_size_mb * 1024 * 1024,\n                                 (int64_t)_options->log_private_reserve_max_time_seconds);\n                             if (status() == partition_status::PS_PRIMARY)\n                                 _counter_private_log_size->set(_private_log->total_size() /\n                                                                1000000);\n                         });\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nerror_code replica::trigger_manual_emergency_checkpoint(decree old_decree)\n{\n    _checker.only_one_thread_access();\n\n    if (_app == nullptr) {\n        derror_replica(\"app hasn't been init or has been released\");\n        return ERR_LOCAL_APP_FAILURE;\n    }\n\n    if (old_decree <= _app->last_durable_decree()) {\n        ddebug_replica(\"checkpoint has been completed: old = {} vs latest = {}\",\n                       old_decree,\n                       _app->last_durable_decree());\n        _is_manual_emergency_checkpointing = false;\n        _stub->_manual_emergency_checkpointing_count == 0\n            ? 0\n            : (--_stub->_manual_emergency_checkpointing_count);\n        return ERR_OK;\n    }\n\n    if (_is_manual_emergency_checkpointing) {\n        dwarn_replica(\"replica is checkpointing, last_durable_decree = {}\",\n                      _app->last_durable_decree());\n        return ERR_BUSY;\n    }\n\n    if (++_stub->_manual_emergency_checkpointing_count >\n        FLAGS_max_concurrent_manual_emergency_checkpointing_count) {\n        dwarn_replica(\"please try again later because checkpointing exceed max running count[{}]\",\n                      FLAGS_max_concurrent_manual_emergency_checkpointing_count);\n        --_stub->_manual_emergency_checkpointing_count;\n        return ERR_TRY_AGAIN;\n    }\n\n    init_checkpoint(true);\n    _is_manual_emergency_checkpointing = true;\n    return ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica::init_checkpoint(bool is_emergency)\n{\n    // only applicable to primary and secondary replicas\n    if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY) {\n        ddebug(\"%s: ignore doing checkpoint for status = %s, is_emergency = %s\",\n               name(),\n               enum_to_string(status()),\n               (is_emergency ? \"true\" : \"false\"));\n        return;\n    }\n\n    // here we demand that async_checkpoint() is implemented.\n    // we delay some time to run background_async_checkpoint() to pass unit test dsn.rep_tests.\n    //\n    // we may issue a new task to do backgroup_async_checkpoint\n    // even if the old one hasn't finished yet\n    tasking::enqueue(LPC_CHECKPOINT_REPLICA,\n                     &_tracker,\n                     [this, is_emergency] { background_async_checkpoint(is_emergency); },\n                     0,\n                     10_ms);\n\n    if (is_emergency)\n        _stub->_counter_recent_trigger_emergency_checkpoint_count->increment();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica::on_query_last_checkpoint(/*out*/ learn_response &response)\n{\n    _checker.only_one_thread_access();\n\n    if (_app->last_durable_decree() == 0) {\n        response.err = ERR_PATH_NOT_FOUND;\n        return;\n    }\n\n    blob placeholder;\n    int err = _app->get_checkpoint(0, placeholder, response.state);\n    if (err != 0) {\n        response.err = ERR_GET_LEARN_STATE_FAILED;\n    } else {\n        response.err = ERR_OK;\n        response.last_committed_decree = last_committed_decree();\n        // for example: base_local_dir = \"./data\" + \"checkpoint.1024\" = \"./data/checkpoint.1024\"\n        response.base_local_dir = utils::filesystem::path_combine(\n            _app->data_dir(), checkpoint_folder(response.state.to_decree_included));\n        response.address = _stub->_primary_address;\n        for (auto &file : response.state.files) {\n            // response.state.files contain file absolute path， for example:\n            // \"./data/checkpoint.1024/1.sst\" use `substr` to get the file name: 1.sst\n            file = file.substr(response.base_local_dir.length() + 1);\n        }\n    }\n}\n\n// run in background thread\nerror_code replica::background_async_checkpoint(bool is_emergency)\n{\n    uint64_t start_time = dsn_now_ns();\n    decree old_durable = _app->last_durable_decree();\n    auto err = _app->async_checkpoint(is_emergency);\n    uint64_t used_time = dsn_now_ns() - start_time;\n    dassert(err != ERR_NOT_IMPLEMENTED, \"err == ERR_NOT_IMPLEMENTED\");\n    if (err == ERR_OK) {\n        if (old_durable != _app->last_durable_decree()) {\n            // if no need to generate new checkpoint, async_checkpoint() also returns ERR_OK,\n            // so we should check if a new checkpoint has been generated.\n            ddebug(\"%s: call app.async_checkpoint() succeed, time_used_ns = %\" PRIu64 \", \"\n                   \"app_last_committed_decree = %\" PRId64 \", app_last_durable_decree = (%\" PRId64\n                   \" => %\" PRId64 \")\",\n                   name(),\n                   used_time,\n                   _app->last_committed_decree(),\n                   old_durable,\n                   _app->last_durable_decree());\n            update_last_checkpoint_generate_time();\n        }\n\n        if (_is_manual_emergency_checkpointing) {\n            _is_manual_emergency_checkpointing = false;\n            _stub->_manual_emergency_checkpointing_count == 0\n                ? 0\n                : (--_stub->_manual_emergency_checkpointing_count);\n        }\n\n        return err;\n    }\n\n    if (err == ERR_TRY_AGAIN) {\n        // already triggered memory flushing on async_checkpoint(), then try again later.\n        ddebug(\"%s: call app.async_checkpoint() returns ERR_TRY_AGAIN, time_used_ns = %\" PRIu64\n               \", schedule later checkpoint after 10 seconds\",\n               name(),\n               used_time);\n        tasking::enqueue(LPC_PER_REPLICA_CHECKPOINT_TIMER,\n                         &_tracker,\n                         [this] { init_checkpoint(false); },\n                         get_gpid().thread_hash(),\n                         std::chrono::seconds(10));\n        return err;\n    }\n\n    if (_is_manual_emergency_checkpointing) {\n        _is_manual_emergency_checkpointing = false;\n        _stub->_manual_emergency_checkpointing_count == 0\n            ? 0\n            : (--_stub->_manual_emergency_checkpointing_count);\n    }\n    if (err == ERR_WRONG_TIMING) {\n        // do nothing\n        ddebug(\"%s: call app.async_checkpoint() returns ERR_WRONG_TIMING, time_used_ns = %\" PRIu64\n               \", just ignore\",\n               name(),\n               used_time);\n    } else {\n        derror(\"%s: call app.async_checkpoint() failed, time_used_ns = %\" PRIu64 \", err = %s\",\n               name(),\n               used_time,\n               err.to_string());\n    }\n    return err;\n}\n\n// run in init thread\nerror_code replica::background_sync_checkpoint()\n{\n    uint64_t start_time = dsn_now_ns();\n    decree old_durable = _app->last_durable_decree();\n    auto err = _app->sync_checkpoint();\n    uint64_t used_time = dsn_now_ns() - start_time;\n    dassert(err != ERR_NOT_IMPLEMENTED, \"err == ERR_NOT_IMPLEMENTED\");\n    if (err == ERR_OK) {\n        if (old_durable != _app->last_durable_decree()) {\n            // if no need to generate new checkpoint, sync_checkpoint() also returns ERR_OK,\n            // so we should check if a new checkpoint has been generated.\n            ddebug(\"%s: call app.sync_checkpoint() succeed, time_used_ns = %\" PRIu64 \", \"\n                   \"app_last_committed_decree = %\" PRId64 \", app_last_durable_decree = (%\" PRId64\n                   \" => %\" PRId64 \")\",\n                   name(),\n                   used_time,\n                   _app->last_committed_decree(),\n                   old_durable,\n                   _app->last_durable_decree());\n            update_last_checkpoint_generate_time();\n        }\n    } else if (err == ERR_WRONG_TIMING) {\n        // do nothing\n        ddebug(\"%s: call app.sync_checkpoint() returns ERR_WRONG_TIMING, time_used_ns = %\" PRIu64\n               \", just ignore\",\n               name(),\n               used_time);\n    } else {\n        derror(\"%s: call app.sync_checkpoint() failed, time_used_ns = %\" PRIu64 \", err = %s\",\n               name(),\n               used_time,\n               err.to_string());\n    }\n    return err;\n}\n\n// in non-replication thread\nvoid replica::catch_up_with_private_logs(partition_status::type s)\n{\n    learn_state state;\n    _private_log->get_learn_state(get_gpid(), _app->last_committed_decree() + 1, state);\n\n    auto err = apply_learned_state_from_private_log(state);\n\n    if (s == partition_status::PS_POTENTIAL_SECONDARY) {\n        _potential_secondary_states.learn_remote_files_completed_task =\n            tasking::create_task(LPC_CHECKPOINT_REPLICA_COMPLETED,\n                                 &_tracker,\n                                 [this, err]() { this->on_learn_remote_state_completed(err); },\n                                 get_gpid().thread_hash());\n        _potential_secondary_states.learn_remote_files_completed_task->enqueue();\n    } else if (s == partition_status::PS_PARTITION_SPLIT) {\n        _split_states.async_learn_task = tasking::enqueue(\n            LPC_PARTITION_SPLIT,\n            tracker(),\n            std::bind(&replica_split_manager::child_catch_up_states, get_split_manager()),\n            get_gpid().thread_hash());\n    } else {\n        _secondary_states.checkpoint_completed_task =\n            tasking::create_task(LPC_CHECKPOINT_REPLICA_COMPLETED,\n                                 &_tracker,\n                                 [this, err]() { this->on_checkpoint_completed(err); },\n                                 get_gpid().thread_hash());\n        _secondary_states.checkpoint_completed_task->enqueue();\n    }\n}\n\nvoid replica::on_checkpoint_completed(error_code err)\n{\n    _checker.only_one_thread_access();\n\n    // closing or wrong timing\n    if (partition_status::PS_SECONDARY != status() || err == ERR_WRONG_TIMING) {\n        _secondary_states.checkpoint_is_running = false;\n        return;\n    }\n\n    // handle failure\n    if (err != ERR_OK) {\n        // done checkpointing\n        _secondary_states.checkpoint_is_running = false;\n        handle_local_failure(err);\n        return;\n    }\n\n    auto c = _prepare_list->last_committed_decree();\n\n    // missing commits\n    if (c > _app->last_committed_decree()) {\n        // missed ones are covered by prepare list\n        if (_app->last_committed_decree() > _prepare_list->min_decree()) {\n            for (auto d = _app->last_committed_decree() + 1; d <= c; d++) {\n                auto mu = _prepare_list->get_mutation_by_decree(d);\n                dassert(nullptr != mu, \"invalid mutation, decree = %\" PRId64, d);\n                err = _app->apply_mutation(mu);\n                if (ERR_OK != err) {\n                    _secondary_states.checkpoint_is_running = false;\n                    handle_local_failure(err);\n                    return;\n                }\n            }\n\n            // everything is ok now, done checkpointing\n            _secondary_states.checkpoint_is_running = false;\n            update_last_checkpoint_generate_time();\n        }\n\n        // missed ones need to be loaded via private logs\n        else {\n            _secondary_states.catchup_with_private_log_task = tasking::create_task(\n                LPC_CATCHUP_WITH_PRIVATE_LOGS,\n                &_tracker,\n                [this]() { this->catch_up_with_private_logs(partition_status::PS_SECONDARY); },\n                get_gpid().thread_hash());\n            _secondary_states.catchup_with_private_log_task->enqueue();\n        }\n    }\n\n    // no missing commits\n    else {\n        // everything is ok now, done checkpointing\n        _secondary_states.checkpoint_is_running = false;\n        update_last_checkpoint_generate_time();\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_config.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replica configuration management\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <boost/lexical_cast.hpp>\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"bulk_load/replica_bulk_loader.h\"\n#include \"runtime/security/access_controller.h\"\n#include \"split/replica_split_manager.h\"\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/dist/replication/replica_envs.h>\n\nnamespace dsn {\nnamespace replication {\n\nbool get_bool_envs(const std::map<std::string, std::string> &envs,\n                   const std::string &name,\n                   bool &value)\n{\n    auto iter = envs.find(name);\n    if (iter != envs.end()) {\n        if (!buf2bool(iter->second, value)) {\n            return false;\n        }\n    }\n    return true;\n}\n\nvoid replica::on_config_proposal(configuration_update_request &proposal)\n{\n    _checker.only_one_thread_access();\n\n    ddebug(\"%s: process config proposal %s for %s\",\n           name(),\n           enum_to_string(proposal.type),\n           proposal.node.to_string());\n\n    if (proposal.config.ballot < get_ballot()) {\n        dwarn(\"%s: on_config_proposal out-dated, %\" PRId64 \" vs %\" PRId64,\n              name(),\n              proposal.config.ballot,\n              get_ballot());\n        return;\n    }\n\n    if (_primary_states.reconfiguration_task != nullptr) {\n        dinfo(\"%s: reconfiguration on the way, skip the incoming proposal\", name());\n        return;\n    }\n\n    if (proposal.config.ballot > get_ballot()) {\n        if (!update_configuration(proposal.config)) {\n            // is closing or update failed\n            return;\n        }\n    }\n\n    _app_info.__set_duplicating(proposal.info.duplicating);\n    switch (proposal.type) {\n    case config_type::CT_ASSIGN_PRIMARY:\n    case config_type::CT_UPGRADE_TO_PRIMARY:\n        assign_primary(proposal);\n        break;\n    case config_type::CT_ADD_SECONDARY:\n    case config_type::CT_ADD_SECONDARY_FOR_LB:\n        add_potential_secondary(proposal);\n        break;\n    case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        downgrade_to_secondary_on_primary(proposal);\n        break;\n    case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        downgrade_to_inactive_on_primary(proposal);\n        break;\n    case config_type::CT_REMOVE:\n        remove(proposal);\n        break;\n    default:\n        dassert(false, \"invalid config_type, type = %s\", enum_to_string(proposal.type));\n    }\n}\n\nvoid replica::assign_primary(configuration_update_request &proposal)\n{\n    dassert(proposal.node == _stub->_primary_address,\n            \"%s VS %s\",\n            proposal.node.to_string(),\n            _stub->_primary_address_str);\n\n    if (status() == partition_status::PS_PRIMARY) {\n        dwarn(\"%s: invalid assgin primary proposal as the node is in %s\",\n              name(),\n              enum_to_string(status()));\n        return;\n    }\n\n    if (proposal.type == config_type::CT_UPGRADE_TO_PRIMARY &&\n        (status() != partition_status::PS_SECONDARY || _secondary_states.checkpoint_is_running) &&\n        status() != partition_status::PS_PARTITION_SPLIT) {\n        dwarn(\n            \"%s: invalid upgrade to primary proposal as the node is in %s or during checkpointing\",\n            name(),\n            enum_to_string(status()));\n\n        // TODO: tell meta server so new primary is built more quickly\n        return;\n    }\n\n    proposal.config.primary = _stub->_primary_address;\n    replica_helper::remove_node(_stub->_primary_address, proposal.config.secondaries);\n\n    update_configuration_on_meta_server(proposal.type, proposal.node, proposal.config);\n}\n\n// run on primary to send ADD_LEARNER request to candidate replica server\nvoid replica::add_potential_secondary(configuration_update_request &proposal)\n{\n    if (status() != partition_status::PS_PRIMARY) {\n        dwarn(\"%s: ignore add secondary proposal for invalid state, state = %s\",\n              name(),\n              enum_to_string(status()));\n        return;\n    }\n\n    dassert(proposal.config.ballot == get_ballot(),\n            \"invalid ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            proposal.config.ballot,\n            get_ballot());\n    dassert(proposal.config.pid == _primary_states.membership.pid,\n            \"(%d.%d) VS (%d.%d)\",\n            proposal.config.pid.get_app_id(),\n            proposal.config.pid.get_partition_index(),\n            _primary_states.membership.pid.get_app_id(),\n            _primary_states.membership.pid.get_partition_index());\n    dassert(proposal.config.primary == _primary_states.membership.primary,\n            \"%s VS %s\",\n            proposal.config.primary.to_string(),\n            _primary_states.membership.primary.to_string());\n    dassert(proposal.config.secondaries == _primary_states.membership.secondaries,\n            \"count(%d) VS count(%d)\",\n            (int)proposal.config.secondaries.size(),\n            (int)_primary_states.membership.secondaries.size());\n    dassert(!_primary_states.check_exist(proposal.node, partition_status::PS_PRIMARY),\n            \"node = %s\",\n            proposal.node.to_string());\n    dassert(!_primary_states.check_exist(proposal.node, partition_status::PS_SECONDARY),\n            \"node = %s\",\n            proposal.node.to_string());\n\n    int potential_secondaries_count =\n        _primary_states.membership.secondaries.size() + _primary_states.learners.size();\n    if (potential_secondaries_count >= _primary_states.membership.max_replica_count - 1) {\n        if (proposal.type == config_type::CT_ADD_SECONDARY) {\n            if (_primary_states.learners.find(proposal.node) == _primary_states.learners.end()) {\n                ddebug(\"%s: already have enough secondaries or potential secondaries, ignore new \"\n                       \"potential secondary proposal\",\n                       name());\n                return;\n            }\n        } else if (proposal.type == config_type::CT_ADD_SECONDARY_FOR_LB) {\n            if (potential_secondaries_count >= _primary_states.membership.max_replica_count) {\n                ddebug(\"%s: only allow one extra (potential) secondary, ingnore new potential \"\n                       \"secondary proposal\",\n                       name());\n                return;\n            } else {\n                ddebug(\"%s: add a new secondary(%s) for future load balancer\",\n                       name(),\n                       proposal.node.to_string());\n            }\n        } else {\n            dassert(false, \"invalid config_type, type = %s\", enum_to_string(proposal.type));\n        }\n    }\n\n    remote_learner_state state;\n    state.prepare_start_decree = invalid_decree;\n    state.timeout_task = nullptr; // TODO: add timer for learner task\n\n    auto it = _primary_states.learners.find(proposal.node);\n    if (it != _primary_states.learners.end()) {\n        state.signature = it->second.signature;\n    } else {\n        state.signature = ++_primary_states.next_learning_version;\n        _primary_states.learners[proposal.node] = state;\n        _primary_states.statuses[proposal.node] = partition_status::PS_POTENTIAL_SECONDARY;\n    }\n\n    group_check_request request;\n    request.app = _app_info;\n    request.node = proposal.node;\n    _primary_states.get_replica_config(\n        partition_status::PS_POTENTIAL_SECONDARY, request.config, state.signature);\n    request.last_committed_decree = last_committed_decree();\n\n    ddebug(\"%s: call one way %s to start learning with signature [%016\" PRIx64 \"]\",\n           name(),\n           proposal.node.to_string(),\n           state.signature);\n\n    rpc::call_one_way_typed(\n        proposal.node, RPC_LEARN_ADD_LEARNER, request, get_gpid().thread_hash());\n}\n\nvoid replica::upgrade_to_secondary_on_primary(::dsn::rpc_address node)\n{\n    ddebug(\"%s: upgrade potential secondary %s to secondary\", name(), node.to_string());\n\n    partition_configuration newConfig = _primary_states.membership;\n\n    // add secondary\n    newConfig.secondaries.push_back(node);\n\n    update_configuration_on_meta_server(config_type::CT_UPGRADE_TO_SECONDARY, node, newConfig);\n}\n\nvoid replica::downgrade_to_secondary_on_primary(configuration_update_request &proposal)\n{\n    if (proposal.config.ballot != get_ballot() || status() != partition_status::PS_PRIMARY)\n        return;\n\n    dassert(proposal.config.pid == _primary_states.membership.pid,\n            \"(%d.%d) VS (%d.%d)\",\n            proposal.config.pid.get_app_id(),\n            proposal.config.pid.get_partition_index(),\n            _primary_states.membership.pid.get_app_id(),\n            _primary_states.membership.pid.get_partition_index());\n    dassert(proposal.config.primary == _primary_states.membership.primary,\n            \"%s VS %s\",\n            proposal.config.primary.to_string(),\n            _primary_states.membership.primary.to_string());\n    dassert(proposal.config.secondaries == _primary_states.membership.secondaries, \"\");\n    dassert(proposal.node == proposal.config.primary,\n            \"%s VS %s\",\n            proposal.node.to_string(),\n            proposal.config.primary.to_string());\n\n    proposal.config.primary.set_invalid();\n    proposal.config.secondaries.push_back(proposal.node);\n\n    update_configuration_on_meta_server(\n        config_type::CT_DOWNGRADE_TO_SECONDARY, proposal.node, proposal.config);\n}\n\nvoid replica::downgrade_to_inactive_on_primary(configuration_update_request &proposal)\n{\n    if (proposal.config.ballot != get_ballot() || status() != partition_status::PS_PRIMARY)\n        return;\n\n    dassert(proposal.config.pid == _primary_states.membership.pid,\n            \"(%d.%d) VS (%d.%d)\",\n            proposal.config.pid.get_app_id(),\n            proposal.config.pid.get_partition_index(),\n            _primary_states.membership.pid.get_app_id(),\n            _primary_states.membership.pid.get_partition_index());\n    dassert(proposal.config.primary == _primary_states.membership.primary,\n            \"%s VS %s\",\n            proposal.config.primary.to_string(),\n            _primary_states.membership.primary.to_string());\n    dassert(proposal.config.secondaries == _primary_states.membership.secondaries, \"\");\n\n    if (proposal.node == proposal.config.primary) {\n        proposal.config.primary.set_invalid();\n    } else {\n        auto rt = replica_helper::remove_node(proposal.node, proposal.config.secondaries);\n        dassert(rt, \"remove node failed, node = %s\", proposal.node.to_string());\n    }\n\n    update_configuration_on_meta_server(\n        config_type::CT_DOWNGRADE_TO_INACTIVE, proposal.node, proposal.config);\n}\n\nvoid replica::remove(configuration_update_request &proposal)\n{\n    if (proposal.config.ballot != get_ballot() || status() != partition_status::PS_PRIMARY)\n        return;\n\n    dassert(proposal.config.pid == _primary_states.membership.pid,\n            \"(%d.%d) VS (%d.%d)\",\n            proposal.config.pid.get_app_id(),\n            proposal.config.pid.get_partition_index(),\n            _primary_states.membership.pid.get_app_id(),\n            _primary_states.membership.pid.get_partition_index());\n    dassert(proposal.config.primary == _primary_states.membership.primary,\n            \"%s VS %s\",\n            proposal.config.primary.to_string(),\n            _primary_states.membership.primary.to_string());\n    dassert(proposal.config.secondaries == _primary_states.membership.secondaries, \"\");\n\n    auto st = _primary_states.get_node_status(proposal.node);\n\n    switch (st) {\n    case partition_status::PS_PRIMARY:\n        dassert(proposal.config.primary == proposal.node,\n                \"%s VS %s\",\n                proposal.config.primary.to_string(),\n                proposal.node.to_string());\n        proposal.config.primary.set_invalid();\n        break;\n    case partition_status::PS_SECONDARY: {\n        auto rt = replica_helper::remove_node(proposal.node, proposal.config.secondaries);\n        dassert(rt, \"remove_node failed, node = %s\", proposal.node.to_string());\n    } break;\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        break;\n    default:\n        break;\n    }\n\n    update_configuration_on_meta_server(config_type::CT_REMOVE, proposal.node, proposal.config);\n}\n\n// from primary\nvoid replica::on_remove(const replica_configuration &request)\n{\n    if (request.ballot < get_ballot())\n        return;\n\n    //\n    // - meta-server requires primary r1 to remove this secondary r2\n    // - primary update config from {3,r1,[r2,r3]} to {4,r1,[r3]}\n    // - primary send one way RPC_REMOVE_REPLICA to r2, but this message is delay by network\n    // - meta-server requires primary r1 to add new secondary on r2 again (though this case would\n    // not occur generally)\n    // - primary send RPC_LEARN_ADD_LEARNER to r2 with config of {4,r1,[r3]}, then r2 start to learn\n    // - when r2 is on learning, the remove request is arrived, with the same ballot\n    // - here we ignore the lately arrived remove request, which is proper\n    //\n    if (request.ballot == get_ballot() && partition_status::PS_POTENTIAL_SECONDARY == status()) {\n        dwarn(\"this implies that a config proposal request (e.g. add secondary) \"\n              \"with the same ballot arrived before this remove request, \"\n              \"current status is %s\",\n              enum_to_string(status()));\n        return;\n    }\n\n    dassert(request.status == partition_status::PS_INACTIVE,\n            \"invalid partition_status, status = %s\",\n            enum_to_string(request.status));\n    update_local_configuration(request);\n}\n\nvoid replica::update_configuration_on_meta_server(config_type::type type,\n                                                  ::dsn::rpc_address node,\n                                                  partition_configuration &newConfig)\n{\n    // type should never be `CT_REGISTER_CHILD`\n    // if this happens, it means serious mistake happened during partition split\n    // assert here to stop split and avoid splitting wrong\n    if (type == config_type::CT_REGISTER_CHILD) {\n        dassert_replica(false, \"invalid config_type, type = {}\", enum_to_string(type));\n    }\n\n    newConfig.last_committed_decree = last_committed_decree();\n\n    if (type == config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT) {\n        dassert(status() == partition_status::PS_INACTIVE && _inactive_is_transient &&\n                    _is_initializing,\n                \"\");\n        dassert(\n            newConfig.primary == node, \"%s VS %s\", newConfig.primary.to_string(), node.to_string());\n    } else if (type != config_type::CT_ASSIGN_PRIMARY &&\n               type != config_type::CT_UPGRADE_TO_PRIMARY) {\n        dassert(status() == partition_status::PS_PRIMARY,\n                \"partition status must be primary, status = %s\",\n                enum_to_string(status()));\n        dassert(newConfig.ballot == _primary_states.membership.ballot,\n                \"invalid ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n                newConfig.ballot,\n                _primary_states.membership.ballot);\n    }\n\n    // disable 2pc during reconfiguration\n    // it is possible to do this only for config_type::CT_DOWNGRADE_TO_SECONDARY,\n    // but we choose to disable 2pc during all reconfiguration types\n    // for simplicity at the cost of certain write throughput\n    update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE);\n    set_inactive_state_transient(true);\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_UPDATE_PARTITION_CONFIGURATION);\n\n    std::shared_ptr<configuration_update_request> request(new configuration_update_request);\n    request->info = _app_info;\n    request->config = newConfig;\n    request->config.ballot++;\n    request->type = type;\n    request->node = node;\n\n    ::dsn::marshall(msg, *request);\n\n    if (nullptr != _primary_states.reconfiguration_task) {\n        _primary_states.reconfiguration_task->cancel(true);\n    }\n\n    ddebug(\"%s: send update configuration request to meta server, ballot = %\" PRId64\n           \", type = %s, node = %s\",\n           name(),\n           request->config.ballot,\n           enum_to_string(request->type),\n           request->node.to_string());\n\n    rpc_address target(_stub->_failure_detector->get_servers());\n    _primary_states.reconfiguration_task =\n        rpc::call(target,\n                  msg,\n                  &_tracker,\n                  [=](error_code err, dsn::message_ex *reqmsg, dsn::message_ex *response) {\n                      on_update_configuration_on_meta_server_reply(err, reqmsg, response, request);\n                  },\n                  get_gpid().thread_hash());\n}\n\nvoid replica::on_update_configuration_on_meta_server_reply(\n    error_code err,\n    dsn::message_ex *request,\n    dsn::message_ex *response,\n    std::shared_ptr<configuration_update_request> req)\n{\n    _checker.only_one_thread_access();\n\n    if (partition_status::PS_INACTIVE != status() || _stub->is_connected() == false) {\n        _primary_states.reconfiguration_task = nullptr;\n        return;\n    }\n\n    configuration_update_response resp;\n    if (err == ERR_OK) {\n        ::dsn::unmarshall(response, resp);\n        err = resp.err;\n    }\n\n    if (err != ERR_OK) {\n        ddebug(\"%s: update configuration reply with err %s, request ballot %\" PRId64,\n               name(),\n               err.to_string(),\n               req->config.ballot);\n\n        if (err != ERR_INVALID_VERSION) {\n            // when the rpc call timeout, we would delay to do the recall\n            request->add_ref(); // will be released after recall\n            _primary_states.reconfiguration_task = tasking::enqueue(\n                LPC_DELAY_UPDATE_CONFIG,\n                &_tracker,\n                [ this, request, req2 = std::move(req) ]() {\n                    rpc_address target(_stub->_failure_detector->get_servers());\n                    rpc_response_task_ptr t = rpc::create_rpc_response_task(\n                        request,\n                        &_tracker,\n                        [this, req2](\n                            error_code err, dsn::message_ex *request, dsn::message_ex *response) {\n                            on_update_configuration_on_meta_server_reply(\n                                err, request, response, std::move(req2));\n                        },\n                        get_gpid().thread_hash());\n                    _primary_states.reconfiguration_task = t;\n                    dsn_rpc_call(target, t.get());\n                    request->release_ref();\n                },\n                get_gpid().thread_hash(),\n                std::chrono::seconds(1));\n            return;\n        }\n    }\n\n    ddebug(\"%s: update configuration %s, reply with err %s, ballot %\" PRId64\n           \", local ballot %\" PRId64 \", local status %s\",\n           name(),\n           enum_to_string(req->type),\n           resp.err.to_string(),\n           resp.config.ballot,\n           get_ballot(),\n           enum_to_string(status()));\n\n    if (resp.config.ballot < get_ballot()) {\n        _primary_states.reconfiguration_task = nullptr;\n        return;\n    }\n\n    // post-update work items?\n    if (resp.err == ERR_OK) {\n        dassert(req->config.pid == resp.config.pid,\n                \"(%d.%d) VS (%d.%d)\",\n                req->config.pid.get_app_id(),\n                req->config.pid.get_partition_index(),\n                resp.config.pid.get_app_id(),\n                resp.config.pid.get_partition_index());\n        dassert(req->config.primary == resp.config.primary,\n                \"%s VS %s\",\n                req->config.primary.to_string(),\n                resp.config.primary.to_string());\n        dassert(req->config.secondaries == resp.config.secondaries, \"\");\n\n        switch (req->type) {\n        case config_type::CT_UPGRADE_TO_PRIMARY:\n            _primary_states.last_prepare_decree_on_new_primary = _prepare_list->max_decree();\n            break;\n        case config_type::CT_ASSIGN_PRIMARY:\n            _primary_states.last_prepare_decree_on_new_primary = 0;\n            break;\n        case config_type::CT_DOWNGRADE_TO_SECONDARY:\n        case config_type::CT_DOWNGRADE_TO_INACTIVE:\n        case config_type::CT_UPGRADE_TO_SECONDARY:\n            break;\n        case config_type::CT_REMOVE:\n            if (req->node != _stub->_primary_address) {\n                replica_configuration rconfig;\n                replica_helper::get_replica_config(resp.config, req->node, rconfig);\n                rpc::call_one_way_typed(\n                    req->node, RPC_REMOVE_REPLICA, rconfig, get_gpid().thread_hash());\n            }\n            break;\n        case config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT:\n            dassert(_is_initializing, \"\");\n            _is_initializing = false;\n            break;\n        default:\n            dassert(false, \"invalid config_type, type = %s\", enum_to_string(req->type));\n        }\n    }\n\n    update_configuration(resp.config);\n    _primary_states.reconfiguration_task = nullptr;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica::update_app_envs(const std::map<std::string, std::string> &envs)\n{\n    if (_app) {\n        update_app_envs_internal(envs);\n        _app->update_app_envs(envs);\n    }\n}\n\nvoid replica::update_app_envs_internal(const std::map<std::string, std::string> &envs)\n{\n    update_bool_envs(envs, replica_envs::SPLIT_VALIDATE_PARTITION_HASH, _validate_partition_hash);\n\n    update_throttle_envs(envs);\n\n    update_ac_allowed_users(envs);\n\n    update_allow_ingest_behind(envs);\n\n    update_deny_client(envs);\n}\n\nvoid replica::update_bool_envs(const std::map<std::string, std::string> &envs,\n                               const std::string &name,\n                               bool &value)\n{\n    bool new_value = false;\n    if (!get_bool_envs(envs, name, new_value)) {\n        dwarn_replica(\"invalid value of env {}\", name);\n        return;\n    }\n    if (new_value != value) {\n        ddebug_replica(\"switch env[{}] from {} to {}\", name, value, new_value);\n        value = new_value;\n    }\n}\n\nvoid replica::update_ac_allowed_users(const std::map<std::string, std::string> &envs)\n{\n    std::string allowed_users;\n    auto iter = envs.find(replica_envs::REPLICA_ACCESS_CONTROLLER_ALLOWED_USERS);\n    if (iter != envs.end()) {\n        allowed_users = iter->second;\n    }\n\n    _access_controller->update(allowed_users);\n}\n\nvoid replica::update_allow_ingest_behind(const std::map<std::string, std::string> &envs)\n{\n    bool new_value = false;\n    if (!get_bool_envs(envs, replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND, new_value)) {\n        return;\n    }\n    if (new_value != _allow_ingest_behind) {\n        auto info = _app_info;\n        info.envs = envs;\n        if (store_app_info(info) != ERR_OK) {\n            return;\n        }\n        ddebug_replica(\"switch env[{}] from {} to {}\",\n                       replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND,\n                       _allow_ingest_behind,\n                       new_value);\n        _allow_ingest_behind = new_value;\n    }\n}\n\nvoid replica::update_deny_client(const std::map<std::string, std::string> &envs)\n{\n    auto env_iter = envs.find(replica_envs::DENY_CLIENT_REQUEST);\n    if (env_iter == envs.end()) {\n        _deny_client.reset();\n        return;\n    }\n\n    std::vector<std::string> sub_sargs;\n    utils::split_args(env_iter->second.c_str(), sub_sargs, '*', true);\n    dcheck_eq_replica(sub_sargs.size(), 2);\n\n    _deny_client.reconfig = (sub_sargs[0] == \"reconfig\");\n    _deny_client.read = (sub_sargs[1] == \"read\" || sub_sargs[1] == \"all\");\n    _deny_client.write = (sub_sargs[1] == \"write\" || sub_sargs[1] == \"all\");\n}\n\nvoid replica::query_app_envs(/*out*/ std::map<std::string, std::string> &envs)\n{\n    if (_app) {\n        _app->query_app_envs(envs);\n    }\n}\n\nbool replica::update_configuration(const partition_configuration &config)\n{\n    dassert(config.ballot >= get_ballot(),\n            \"invalid ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            config.ballot,\n            get_ballot());\n\n    replica_configuration rconfig;\n    replica_helper::get_replica_config(config, _stub->_primary_address, rconfig);\n\n    if (rconfig.status == partition_status::PS_PRIMARY &&\n        (rconfig.ballot > get_ballot() || status() != partition_status::PS_PRIMARY)) {\n        _primary_states.reset_membership(config, config.primary != _stub->_primary_address);\n    }\n\n    if (config.ballot > get_ballot() ||\n        is_same_ballot_status_change_allowed(status(), rconfig.status)) {\n        return update_local_configuration(rconfig, true);\n    } else\n        return false;\n}\n\nbool replica::is_same_ballot_status_change_allowed(partition_status::type olds,\n                                                   partition_status::type news)\n{\n    return\n        // add learner\n        (olds == partition_status::PS_INACTIVE && news == partition_status::PS_POTENTIAL_SECONDARY)\n\n        // learner ready for secondary\n        ||\n        (olds == partition_status::PS_POTENTIAL_SECONDARY && news == partition_status::PS_SECONDARY)\n\n        // meta server come back\n        || (olds == partition_status::PS_INACTIVE && news == partition_status::PS_SECONDARY &&\n            _inactive_is_transient)\n\n        // meta server come back\n        || (olds == partition_status::PS_INACTIVE && news == partition_status::PS_PRIMARY &&\n            _inactive_is_transient)\n\n        // no change\n        || (olds == news);\n}\n\nbool replica::update_local_configuration(const replica_configuration &config,\n                                         bool same_ballot /* = false*/)\n{\n    FAIL_POINT_INJECT_F(\"replica_update_local_configuration\", [=](dsn::string_view) -> bool {\n        auto old_status = status();\n        _config = config;\n        ddebug_replica(\n            \"update status from {} to {}\", enum_to_string(old_status), enum_to_string(status()));\n        return true;\n    });\n\n    dassert(config.ballot > get_ballot() || (same_ballot && config.ballot == get_ballot()),\n            \"invalid ballot, %\" PRId64 \" VS %\" PRId64 \"\",\n            config.ballot,\n            get_ballot());\n    dassert(config.pid == get_gpid(),\n            \"(%d.%d) VS (%d.%d)\",\n            config.pid.get_app_id(),\n            config.pid.get_partition_index(),\n            get_gpid().get_app_id(),\n            get_gpid().get_partition_index());\n\n    partition_status::type old_status = status();\n    ballot old_ballot = get_ballot();\n\n    // skip unncessary configuration change\n    if (old_status == config.status && old_ballot == config.ballot)\n        return true;\n\n    // skip invalid change\n    // but do not disable transitions to partition_status::PS_ERROR as errors\n    // must be handled immmediately\n    switch (old_status) {\n    case partition_status::PS_ERROR: {\n        dwarn(\"%s: status change from %s @ %\" PRId64 \" to %s @ %\" PRId64 \" is not allowed\",\n              name(),\n              enum_to_string(old_status),\n              old_ballot,\n              enum_to_string(config.status),\n              config.ballot);\n        return false;\n    } break;\n    case partition_status::PS_INACTIVE:\n        if ((config.status == partition_status::PS_PRIMARY ||\n             config.status == partition_status::PS_SECONDARY) &&\n            !_inactive_is_transient) {\n            dwarn(\"%s: status change from %s @ %\" PRId64 \" to %s @ %\" PRId64\n                  \" is not allowed when inactive state is not transient\",\n                  name(),\n                  enum_to_string(old_status),\n                  old_ballot,\n                  enum_to_string(config.status),\n                  config.ballot);\n            return false;\n        }\n        break;\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        if (config.status == partition_status::PS_INACTIVE) {\n            if (!_potential_secondary_states.cleanup(false)) {\n                dwarn(\"%s: status change from %s @ %\" PRId64 \" to %s @ %\" PRId64\n                      \" is not allowed coz learning remote state is still running\",\n                      name(),\n                      enum_to_string(old_status),\n                      old_ballot,\n                      enum_to_string(config.status),\n                      config.ballot);\n                return false;\n            }\n        }\n        break;\n    case partition_status::PS_SECONDARY:\n        if (config.status != partition_status::PS_SECONDARY &&\n            config.status != partition_status::PS_ERROR) {\n            if (!_secondary_states.cleanup(false)) {\n                // TODO(sunweijie): totally remove this\n                dsn::task *native_handle;\n                if (_secondary_states.checkpoint_task)\n                    native_handle = _secondary_states.checkpoint_task.get();\n                else if (_secondary_states.checkpoint_completed_task)\n                    native_handle = _secondary_states.checkpoint_completed_task.get();\n                else if (_secondary_states.catchup_with_private_log_task)\n                    native_handle = _secondary_states.catchup_with_private_log_task.get();\n                else\n                    native_handle = nullptr;\n\n                dwarn(\"%s: status change from %s @ %\" PRId64 \" to %s @ %\" PRId64\n                      \" is not allowed coz checkpointing %p is still running\",\n                      name(),\n                      enum_to_string(old_status),\n                      old_ballot,\n                      enum_to_string(config.status),\n                      config.ballot,\n                      native_handle);\n                return false;\n            }\n        }\n        break;\n    case partition_status::PS_PARTITION_SPLIT:\n        if (config.status == partition_status::PS_INACTIVE) {\n            dwarn_replica(\"status change from {} @ {} to {} @ {} is not allowed\",\n                          enum_to_string(old_status),\n                          old_ballot,\n                          enum_to_string(config.status),\n                          config.ballot);\n            return false;\n        }\n        break;\n    default:\n        break;\n    }\n\n    bool r = false;\n    uint64_t oldTs = _last_config_change_time_ms;\n    _config = config;\n    // we should durable the new ballot to prevent the inconsistent state\n    if (_config.ballot > old_ballot) {\n        dsn::error_code result = _app->update_init_info_ballot_and_decree(this);\n        if (result == dsn::ERR_OK) {\n            ddebug(\"%s: update ballot to init file from %\" PRId64 \" to %\" PRId64 \" OK\",\n                   name(),\n                   old_ballot,\n                   _config.ballot);\n        } else {\n            dwarn(\"%s: update ballot to init file from %\" PRId64 \" to %\" PRId64 \" %s\",\n                  name(),\n                  old_ballot,\n                  _config.ballot,\n                  result.to_string());\n        }\n        _split_mgr->parent_cleanup_split_context();\n    }\n    _last_config_change_time_ms = dsn_now_ms();\n    dassert(max_prepared_decree() >= last_committed_decree(),\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            max_prepared_decree(),\n            last_committed_decree());\n\n    _bulk_loader->clear_bulk_load_states_if_needed(old_status, config.status);\n\n    // Notice: there has five ways that primary can change its partition_status\n    //   1, primary change partition config, such as add/remove secondary\n    //   2, downgrage to secondary because of load balance\n    //   3, disnconnected with meta-server\n    //   4, connectied with meta-server\n    //   5, crash\n    // here, we just need to care about case １, 2, 3 and 4, ignore case 5\n    // the way that partition status change is:\n    //   case 1: primary -> ps_inactive & _inactive_is_transient = true -> primary\n    //   case 2: primary -> ps_inavtive & _inactive_is_transient = true -> secondary\n    //   case 3: primary -> ps_inactive & _inactive_is_transient = ture\n    //   case 4: ps_inactive & _inactive_is_transient = true -> primary or secondary\n    // the way we process whether primary stop uploading backup checkpoint is that case-1 continue\n    // uploading, others just stop uploading\n    switch (old_status) {\n    case partition_status::PS_PRIMARY:\n        cleanup_preparing_mutations(false);\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            replay_prepare_list();\n            break;\n        case partition_status::PS_INACTIVE:\n            _primary_states.cleanup(old_ballot != config.ballot);\n            // here we use wheather ballot changes and wheather disconnecting with meta to\n            // distinguish different case above mentioned\n            if (old_ballot == config.ballot && _stub->is_connected()) {\n                // case 1 and case 2, just continue uploading\n                //(when case2, we stop uploading when it change to secondary)\n            } else {\n                set_backup_context_cancel();\n                clear_cold_backup_state();\n            }\n            break;\n        case partition_status::PS_SECONDARY:\n        case partition_status::PS_ERROR:\n            _primary_states.cleanup(true);\n            // only load balance will occur primary -> secondary\n            // and we just stop upload and release the cold_backup_state, and let new primary to\n            // upload\n            set_backup_context_cancel();\n            clear_cold_backup_state();\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    case partition_status::PS_SECONDARY:\n        cleanup_preparing_mutations(false);\n        if (config.status != partition_status::PS_SECONDARY) {\n            // if primary change the ballot, secondary will update ballot from A to\n            // A+1, we don't need clear cold backup context when this case\n            //\n            // if secondary upgrade to primary, we must cancel & clear cold_backup_state, because\n            // new-primary must check whether backup is already completed by previous-primary\n\n            set_backup_context_cancel();\n            clear_cold_backup_state();\n        }\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            init_group_check();\n            replay_prepare_list();\n            break;\n        case partition_status::PS_SECONDARY:\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            // prevent further 2pc\n            // wait next group check or explicit learn for real learning\n            _potential_secondary_states.learning_status = learner_status::LearningWithoutPrepare;\n            break;\n        case partition_status::PS_INACTIVE:\n            break;\n        case partition_status::PS_ERROR:\n            // _secondary_states.cleanup(true); => do it in close as it may block\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_SECONDARY:\n            _prepare_list->truncate(_app->last_committed_decree());\n\n            // using force cleanup now as all tasks must be done already\n            r = _potential_secondary_states.cleanup(true);\n            dassert(r, \"%s: potential secondary context cleanup failed\", name());\n\n            check_state_completeness();\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            break;\n        case partition_status::PS_INACTIVE:\n            break;\n        case partition_status::PS_ERROR:\n            _prepare_list->reset(_app->last_committed_decree());\n            _potential_secondary_states.cleanup(false);\n            // => do this in close as it may block\n            // r = _potential_secondary_states.cleanup(true);\n            // dassert(r, \"%s: potential secondary context cleanup failed\", name());\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    case partition_status::PS_PARTITION_SPLIT:\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            _split_states.cleanup(true);\n            init_group_check();\n            replay_prepare_list();\n            break;\n        case partition_status::PS_SECONDARY:\n            _split_states.cleanup(true);\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_INACTIVE:\n            break;\n        case partition_status::PS_ERROR:\n            _split_states.cleanup(false);\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    case partition_status::PS_INACTIVE:\n        if (config.status != partition_status::PS_PRIMARY || !_inactive_is_transient) {\n            // except for case 1, we need stop uploading backup checkpoint\n            set_backup_context_cancel();\n            clear_cold_backup_state();\n        }\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            dassert(_inactive_is_transient, \"must be in transient state for being primary next\");\n            _inactive_is_transient = false;\n            init_group_check();\n            replay_prepare_list();\n            break;\n        case partition_status::PS_SECONDARY:\n            dassert(_inactive_is_transient, \"must be in transient state for being secondary next\");\n            _inactive_is_transient = false;\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            _inactive_is_transient = false;\n            break;\n        case partition_status::PS_INACTIVE:\n            break;\n        case partition_status::PS_ERROR:\n            // => do this in close as it may block\n            // if (_inactive_is_transient)\n            // {\n            //    _secondary_states.cleanup(true);\n            // }\n\n            if (_inactive_is_transient) {\n                _primary_states.cleanup(true);\n                _secondary_states.cleanup(false);\n            }\n            _inactive_is_transient = false;\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    case partition_status::PS_ERROR:\n        switch (config.status) {\n        case partition_status::PS_PRIMARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_SECONDARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_INACTIVE:\n            dassert(false, \"invalid execution path\");\n            break;\n        case partition_status::PS_ERROR:\n            break;\n        default:\n            dassert(false, \"invalid execution path\");\n        }\n        break;\n    default:\n        dassert(false, \"invalid execution path\");\n    }\n\n    ddebug(\"%s: status change %s @ %\" PRId64 \" => %s @ %\" PRId64 \", pre(%\" PRId64 \", %\" PRId64\n           \"), app(%\" PRId64 \", %\" PRId64 \"), duration = %\" PRIu64 \" ms, %s\",\n           name(),\n           enum_to_string(old_status),\n           old_ballot,\n           enum_to_string(status()),\n           get_ballot(),\n           _prepare_list->max_decree(),\n           _prepare_list->last_committed_decree(),\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           _last_config_change_time_ms - oldTs,\n           boost::lexical_cast<std::string>(_config).c_str());\n\n    if (status() != old_status) {\n        bool is_closing =\n            (status() == partition_status::PS_ERROR ||\n             (status() == partition_status::PS_INACTIVE && get_ballot() > old_ballot));\n        _stub->notify_replica_state_update(config, is_closing);\n\n        if (is_closing) {\n            ddebug(\"%s: being close ...\", name());\n            _stub->begin_close_replica(this);\n            return false;\n        }\n    } else {\n        _stub->notify_replica_state_update(config, false);\n    }\n\n    // start pending mutations if necessary\n    if (status() == partition_status::PS_PRIMARY) {\n        mutation_ptr next = _primary_states.write_queue.check_possible_work(\n            static_cast<int>(_prepare_list->max_decree() - last_committed_decree()));\n        if (next) {\n            init_prepare(next, false);\n        }\n\n        if (_primary_states.membership.secondaries.size() + 1 <\n            _options->mutation_2pc_min_replica_count) {\n            std::vector<mutation_ptr> queued;\n            _primary_states.write_queue.clear(queued);\n            for (auto &m : queued) {\n                for (auto &r : m->client_requests) {\n                    response_client_write(r, ERR_NOT_ENOUGH_MEMBER);\n                }\n            }\n        }\n    }\n\n    return true;\n}\n\nbool replica::update_local_configuration_with_no_ballot_change(partition_status::type s)\n{\n    if (status() == s)\n        return false;\n\n    auto config = _config;\n    config.status = s;\n    return update_local_configuration(config, true);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica::on_config_sync(const app_info &info,\n                             const partition_configuration &config,\n                             split_status::type meta_split_status)\n{\n    dinfo_replica(\"configuration sync\");\n    // no outdated update\n    if (config.ballot < get_ballot())\n        return;\n\n    update_app_max_replica_count(info.max_replica_count);\n    update_app_envs(info.envs);\n    _is_duplication_master = info.duplicating;\n\n    if (status() == partition_status::PS_PRIMARY) {\n        if (nullptr != _primary_states.reconfiguration_task) {\n            // already under reconfiguration, skip configuration sync\n        } else if (info.partition_count != _app_info.partition_count) {\n            _split_mgr->trigger_primary_parent_split(info.partition_count, meta_split_status);\n        }\n    } else {\n        if (_is_initializing) {\n            // in initializing, when replica still primary, need to inc ballot\n            if (config.primary == _stub->_primary_address &&\n                status() == partition_status::PS_INACTIVE && _inactive_is_transient) {\n                update_configuration_on_meta_server(config_type::CT_PRIMARY_FORCE_UPDATE_BALLOT,\n                                                    config.primary,\n                                                    const_cast<partition_configuration &>(config));\n                return;\n            }\n            _is_initializing = false;\n        }\n\n        update_configuration(config);\n\n        if (status() == partition_status::PS_INACTIVE && !_inactive_is_transient) {\n            if (config.primary == _stub->_primary_address // dead primary\n                ||\n                config.primary.is_invalid() // primary is dead (otherwise let primary remove this)\n                ) {\n                ddebug(\"%s: downgrade myself as inactive is not transient, remote_config(%s)\",\n                       name(),\n                       boost::lexical_cast<std::string>(config).c_str());\n                _stub->remove_replica_on_meta_server(_app_info, config);\n            } else {\n                ddebug(\"%s: state is non-transient inactive, waiting primary to remove me\", name());\n            }\n        }\n    }\n}\n\nvoid replica::update_app_max_replica_count(int32_t max_replica_count)\n{\n    if (max_replica_count == _app_info.max_replica_count) {\n        return;\n    }\n\n    auto old_max_replica_count = _app_info.max_replica_count;\n    _app_info.max_replica_count = max_replica_count;\n\n    auto ec = store_app_info(_app_info);\n    dassert_replica(ec == ERR_OK,\n                    \"store_app_info for max_replica_count failed: error_code={}, app_name={}, \"\n                    \"app_id={}, old_max_replica_count={}, new_max_replica_count={}\",\n                    ec.to_string(),\n                    _app_info.app_name,\n                    _app_info.app_id,\n                    old_max_replica_count,\n                    _app_info.max_replica_count);\n}\n\nvoid replica::replay_prepare_list()\n{\n    decree start = last_committed_decree() + 1;\n    decree end = _prepare_list->max_decree();\n\n    ddebug(\"%s: replay prepare list from %\" PRId64 \" to %\" PRId64 \", ballot = %\" PRId64,\n           name(),\n           start,\n           end,\n           get_ballot());\n\n    for (decree decree = start; decree <= end; decree++) {\n        mutation_ptr old = _prepare_list->get_mutation_by_decree(decree);\n        mutation_ptr mu = new_mutation(decree);\n\n        if (old != nullptr) {\n            dinfo(\"copy mutation from mutation_tid=%\" PRIu64 \" to mutation_tid=%\" PRIu64,\n                  old->tid(),\n                  mu->tid());\n            mu->copy_from(old);\n        } else {\n            mu->add_client_request(RPC_REPLICATION_WRITE_EMPTY, nullptr);\n\n            ddebug(\"%s: emit empty mutation %s with mutation_tid=%\" PRIu64\n                   \" when replay prepare list\",\n                   name(),\n                   mu->name(),\n                   mu->tid());\n        }\n\n        init_prepare(mu, true);\n    }\n}\n\nerror_code replica::update_init_info_ballot_and_decree()\n{\n    return _app->update_init_info_ballot_and_decree(this);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_context.cpp",
    "content": "﻿/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     context for replica with different roles\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/utils.h>\n\n#include \"replica_context.h\"\n#include \"replica.h\"\n#include \"replica_stub.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"block_service/block_service_manager.h\"\n\nnamespace dsn {\nnamespace replication {\n\nvoid primary_context::cleanup(bool clean_pending_mutations)\n{\n    do_cleanup_pending_mutations(clean_pending_mutations);\n\n    // clean up group check\n    CLEANUP_TASK_ALWAYS(group_check_task)\n\n    for (auto it = group_check_pending_replies.begin(); it != group_check_pending_replies.end();\n         ++it) {\n        CLEANUP_TASK_ALWAYS(it->second)\n        // it->second->cancel(true);\n    }\n\n    group_check_pending_replies.clear();\n\n    // clean up reconfiguration\n    CLEANUP_TASK_ALWAYS(reconfiguration_task)\n\n    // clean up checkpoint\n    CLEANUP_TASK_ALWAYS(checkpoint_task)\n\n    // cleanup group bulk load\n    for (auto &kv : group_bulk_load_pending_replies) {\n        CLEANUP_TASK_ALWAYS(kv.second);\n    }\n    group_bulk_load_pending_replies.clear();\n\n    membership.ballot = 0;\n\n    cleanup_bulk_load_states();\n\n    cleanup_split_states();\n\n    secondary_disk_status.clear();\n}\n\nbool primary_context::is_cleaned()\n{\n    return nullptr == group_check_task && nullptr == reconfiguration_task &&\n           nullptr == checkpoint_task && group_check_pending_replies.empty() &&\n           nullptr == register_child_task && nullptr == query_child_task &&\n           group_bulk_load_pending_replies.empty();\n}\n\nvoid primary_context::do_cleanup_pending_mutations(bool clean_pending_mutations)\n{\n    if (clean_pending_mutations) {\n        write_queue.clear();\n    }\n}\n\nvoid primary_context::reset_membership(const partition_configuration &config, bool clear_learners)\n{\n    statuses.clear();\n    if (clear_learners) {\n        learners.clear();\n    }\n\n    if (config.ballot > membership.ballot)\n        next_learning_version = (((uint64_t)config.ballot) << 32) + 1;\n    else\n        ++next_learning_version;\n\n    membership = config;\n\n    if (membership.primary.is_invalid() == false) {\n        statuses[membership.primary] = partition_status::PS_PRIMARY;\n    }\n\n    for (auto it = config.secondaries.begin(); it != config.secondaries.end(); ++it) {\n        statuses[*it] = partition_status::PS_SECONDARY;\n        learners.erase(*it);\n    }\n\n    for (auto it = learners.begin(); it != learners.end(); ++it) {\n        statuses[it->first] = partition_status::PS_POTENTIAL_SECONDARY;\n    }\n}\n\nvoid primary_context::get_replica_config(partition_status::type st,\n                                         /*out*/ replica_configuration &config,\n                                         uint64_t learner_signature /*= invalid_signature*/)\n{\n    config.pid = membership.pid;\n    config.primary = membership.primary;\n    config.ballot = membership.ballot;\n    config.status = st;\n    config.learner_signature = learner_signature;\n}\n\nbool primary_context::check_exist(::dsn::rpc_address node, partition_status::type st)\n{\n    switch (st) {\n    case partition_status::PS_PRIMARY:\n        return membership.primary == node;\n    case partition_status::PS_SECONDARY:\n        return std::find(membership.secondaries.begin(), membership.secondaries.end(), node) !=\n               membership.secondaries.end();\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        return learners.find(node) != learners.end();\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", enum_to_string(st));\n        return false;\n    }\n}\n\nvoid primary_context::reset_node_bulk_load_states(const rpc_address &node)\n{\n    secondary_bulk_load_states[node].__set_download_progress(0);\n    secondary_bulk_load_states[node].__set_download_status(ERR_OK);\n    secondary_bulk_load_states[node].__set_ingest_status(ingestion_status::IS_INVALID);\n    secondary_bulk_load_states[node].__set_is_cleaned_up(false);\n    secondary_bulk_load_states[node].__set_is_paused(false);\n}\n\nvoid primary_context::cleanup_bulk_load_states()\n{\n    secondary_bulk_load_states.erase(secondary_bulk_load_states.begin(),\n                                     secondary_bulk_load_states.end());\n    ingestion_is_empty_prepare_sent = false;\n}\n\nvoid primary_context::cleanup_split_states()\n{\n    CLEANUP_TASK_ALWAYS(register_child_task)\n    CLEANUP_TASK_ALWAYS(query_child_task)\n\n    caught_up_children.clear();\n    sync_send_write_request = false;\n    split_stopped_secondary.clear();\n}\n\nbool primary_context::secondary_disk_space_insufficient() const\n{\n    for (const auto &kv : secondary_disk_status) {\n        if (kv.second == disk_status::SPACE_INSUFFICIENT) {\n            ddebug_f(\"partition[{}] secondary[{}] disk space is insufficient\",\n                     membership.pid,\n                     kv.first.to_string());\n            return true;\n        }\n    }\n    return false;\n}\n\nbool secondary_context::cleanup(bool force)\n{\n    CLEANUP_TASK(checkpoint_task, force)\n\n    if (!force) {\n        CLEANUP_TASK_ALWAYS(checkpoint_completed_task);\n    } else {\n        CLEANUP_TASK(checkpoint_completed_task, force)\n    }\n\n    CLEANUP_TASK(catchup_with_private_log_task, force)\n\n    checkpoint_is_running = false;\n    return true;\n}\n\nbool secondary_context::is_cleaned() { return checkpoint_is_running == false; }\n\nbool potential_secondary_context::cleanup(bool force)\n{\n    task_ptr t = nullptr;\n\n    if (!force) {\n        CLEANUP_TASK_ALWAYS(delay_learning_task)\n\n        CLEANUP_TASK_ALWAYS(learning_task)\n\n        CLEANUP_TASK_ALWAYS(learn_remote_files_completed_task)\n\n        CLEANUP_TASK_ALWAYS(completion_notify_task)\n    } else {\n        CLEANUP_TASK(delay_learning_task, true)\n\n        CLEANUP_TASK(learning_task, true)\n\n        CLEANUP_TASK(learn_remote_files_completed_task, true)\n\n        CLEANUP_TASK(completion_notify_task, true)\n    }\n\n    CLEANUP_TASK(learn_remote_files_task, force)\n\n    CLEANUP_TASK(catchup_with_private_log_task, force)\n\n    learning_version = 0;\n    learning_start_ts_ns = 0;\n    learning_copy_file_count = 0;\n    learning_copy_file_size = 0;\n    learning_copy_buffer_size = 0;\n    learning_round_is_running = false;\n    if (learn_app_concurrent_count_increased) {\n        --owner_replica->get_replica_stub()->_learn_app_concurrent_count;\n        learn_app_concurrent_count_increased = false;\n    }\n    learning_start_prepare_decree = invalid_decree;\n    first_learn_start_decree = invalid_decree;\n    learning_status = learner_status::LearningInvalid;\n    return true;\n}\n\nbool potential_secondary_context::is_cleaned()\n{\n    return nullptr == delay_learning_task && nullptr == learning_task &&\n           nullptr == learn_remote_files_task && nullptr == learn_remote_files_completed_task &&\n           nullptr == catchup_with_private_log_task && nullptr == completion_notify_task;\n}\n\nbool partition_split_context::cleanup(bool force)\n{\n    CLEANUP_TASK(async_learn_task, force)\n    if (!force) {\n        CLEANUP_TASK_ALWAYS(check_state_task)\n    } else {\n        CLEANUP_TASK(check_state_task, force)\n    }\n\n    splitting_start_ts_ns = 0;\n    splitting_start_async_learn_ts_ns = 0;\n    splitting_copy_file_count = 0;\n    splitting_copy_file_size = 0;\n    parent_gpid.set_app_id(0);\n    is_prepare_list_copied = false;\n    is_caught_up = false;\n    return true;\n}\n\nbool partition_split_context::is_cleaned() const\n{\n    return async_learn_task == nullptr && check_state_task == nullptr;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_context.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/block_service.h>\n#include <dsn/cpp/json_helper.h>\n\n#include \"mutation.h\"\n\nclass replication_service_test_app;\n\nnamespace dsn {\nnamespace replication {\n\nclass replica;\nclass replica_stub;\n\nstruct remote_learner_state\n{\n    int64_t signature;\n    ::dsn::task_ptr timeout_task;\n    decree prepare_start_decree;\n    std::string last_learn_log_file;\n};\n\ntypedef std::unordered_map<::dsn::rpc_address, remote_learner_state> learner_map;\n\n#define CLEANUP_TASK(task_, force)                                                                 \\\n    {                                                                                              \\\n        task_ptr t = task_;                                                                        \\\n        if (t != nullptr) {                                                                        \\\n            bool finished;                                                                         \\\n            t->cancel(force, &finished);                                                           \\\n            if (!finished && !dsn_task_is_running_inside(task_.get()))                             \\\n                return false;                                                                      \\\n            task_ = nullptr;                                                                       \\\n        }                                                                                          \\\n    }\n\n#define CLEANUP_TASK_ALWAYS(task_)                                                                 \\\n    {                                                                                              \\\n        task_ptr t = task_;                                                                        \\\n        if (t != nullptr) {                                                                        \\\n            bool finished;                                                                         \\\n            t->cancel(false, &finished);                                                           \\\n            dassert(finished || dsn_task_is_running_inside(task_.get()),                           \\\n                    \"task must be finished at this point\");                                        \\\n            task_ = nullptr;                                                                       \\\n        }                                                                                          \\\n    }\n\nclass primary_context\n{\npublic:\n    primary_context(gpid gpid, int max_concurrent_2pc_count = 1, bool batch_write_disabled = false)\n        : next_learning_version(0),\n          write_queue(gpid, max_concurrent_2pc_count, batch_write_disabled),\n          last_prepare_decree_on_new_primary(0),\n          last_prepare_ts_ms(dsn_now_ms())\n    {\n    }\n\n    void cleanup(bool clean_pending_mutations = true);\n    bool is_cleaned();\n\n    void reset_membership(const partition_configuration &config, bool clear_learners);\n    void get_replica_config(partition_status::type status,\n                            /*out*/ replica_configuration &config,\n                            uint64_t learner_signature = invalid_signature);\n    bool check_exist(::dsn::rpc_address node, partition_status::type status);\n    partition_status::type get_node_status(::dsn::rpc_address addr) const;\n\n    void do_cleanup_pending_mutations(bool clean_pending_mutations = true);\n\n    // reset bulk load states in secondary_bulk_load_states by node address\n    void reset_node_bulk_load_states(const rpc_address &node);\n\n    void cleanup_bulk_load_states();\n\n    void cleanup_split_states();\n\n    bool secondary_disk_space_insufficient() const;\n\npublic:\n    // membership mgr, including learners\n    partition_configuration membership;\n    node_statuses statuses;\n    learner_map learners;\n    uint64_t next_learning_version;\n\n    // 2pc batching\n    mutation_queue write_queue;\n\n    // group check\n    dsn::task_ptr group_check_task; // the repeated group check task of LPC_GROUP_CHECK\n    // calls broadcast_group_check() to check all replicas separately\n    // created in replica::init_group_check()\n    // cancelled in cleanup() when status changed from PRIMARY to others\n    node_tasks group_check_pending_replies; // group check response tasks of RPC_GROUP_CHECK for\n                                            // each replica\n\n    // reconfiguration task of RPC_CM_UPDATE_PARTITION_CONFIGURATION\n    dsn::task_ptr reconfiguration_task;\n\n    // when read lastest update, all prepared decrees must be firstly committed\n    // (possibly true on old primary) before opening read service\n    decree last_prepare_decree_on_new_primary;\n\n    // copy checkpoint from secondaries ptr\n    dsn::task_ptr checkpoint_task;\n\n    uint64_t last_prepare_ts_ms;\n\n    // Used for partition split\n    // child addresses who has been caught up with its parent\n    std::unordered_set<dsn::rpc_address> caught_up_children;\n\n    // Used for partition split\n    // whether parent's write request should be sent to child synchronously\n    // if {sync_send_write_request} = true\n    // - parent should recevie prepare ack from child synchronously during 2pc\n    // if {sync_send_write_request} = false and replica is during partition split\n    // - parent should copy mutations to child asynchronously, child is during async-learn\n    // whether a replica is during partition split is determined by a variety named `_child_gpid` of\n    // replica class\n    // if app_id of `_child_gpid` is greater than zero, it means replica is during partition split,\n    // otherwise, not during partition split\n    bool sync_send_write_request{false};\n\n    // Used for partition split\n    // primary parent register child on meta_server task\n    dsn::task_ptr register_child_task;\n\n    // Used partition split\n    // secondary replica address who has paused or canceled split\n    std::unordered_set<rpc_address> split_stopped_secondary;\n\n    // Used for partition split\n    // primary parent query child on meta_server task\n    // Called by `trigger_primary_parent_split`\n    dsn::task_ptr query_child_task;\n\n    // Used for bulk load\n    // group bulk_load response tasks of RPC_GROUP_BULK_LOAD for each secondary replica\n    node_tasks group_bulk_load_pending_replies;\n    // bulk_load_state of secondary replicas\n    std::unordered_map<rpc_address, partition_bulk_load_state> secondary_bulk_load_states;\n    // if primary send an empty prepare after ingestion succeed to gurantee secondary commit its\n    // ingestion request\n    bool ingestion_is_empty_prepare_sent{false};\n\n    // secondary rpc_address -> secondary disk_status\n    std::unordered_map<rpc_address, disk_status::type> secondary_disk_status;\n};\n\nclass secondary_context\n{\npublic:\n    secondary_context() : checkpoint_is_running(false) {}\n    bool cleanup(bool force);\n    bool is_cleaned();\n\npublic:\n    bool checkpoint_is_running;\n    ::dsn::task_ptr checkpoint_task;\n    ::dsn::task_ptr checkpoint_completed_task;\n    ::dsn::task_ptr catchup_with_private_log_task;\n};\n\nclass potential_secondary_context\n{\npublic:\n    explicit potential_secondary_context(replica *r)\n        : owner_replica(r),\n          learning_version(0),\n          learning_start_ts_ns(0),\n          learning_copy_file_count(0),\n          learning_copy_file_size(0),\n          learning_copy_buffer_size(0),\n          learning_status(learner_status::LearningInvalid),\n          learning_round_is_running(false),\n          learn_app_concurrent_count_increased(false),\n          learning_start_prepare_decree(invalid_decree)\n    {\n    }\n\n    bool cleanup(bool force);\n    bool is_cleaned();\n    uint64_t duration_ms() const\n    {\n        return learning_start_ts_ns > 0 ? (dsn_now_ns() - learning_start_ts_ns) / 1000000 : 0;\n    }\n\npublic:\n    replica *owner_replica;\n    uint64_t learning_version;\n    uint64_t learning_start_ts_ns;\n    uint64_t learning_copy_file_count;\n    uint64_t learning_copy_file_size;\n    uint64_t learning_copy_buffer_size;\n    learner_status::type learning_status;\n    volatile bool learning_round_is_running;\n    volatile bool learn_app_concurrent_count_increased;\n    decree learning_start_prepare_decree;\n\n    // The start decree in the first round of learn.\n    // It indicates the minimum decree under `learn/` dir.\n    decree first_learn_start_decree{invalid_decree};\n\n    ::dsn::task_ptr delay_learning_task;\n    ::dsn::task_ptr learning_task;\n    ::dsn::task_ptr learn_remote_files_task;\n    ::dsn::task_ptr learn_remote_files_completed_task;\n    ::dsn::task_ptr catchup_with_private_log_task;\n    ::dsn::task_ptr completion_notify_task;\n};\n\nclass partition_split_context\n{\npublic:\n    bool cleanup(bool force);\n    bool is_cleaned() const;\n    uint64_t total_ms() const\n    {\n        return splitting_start_ts_ns > 0 ? (dsn_now_ns() - splitting_start_ts_ns) / 1000000 : 0;\n    }\n    uint64_t async_learn_ms() const\n    {\n        return splitting_start_async_learn_ts_ns > 0\n                   ? (dsn_now_ns() - splitting_start_async_learn_ts_ns) / 1000000\n                   : 0;\n    }\n\npublic:\n    gpid parent_gpid;\n    // whether child has copied parent prepare list\n    bool is_prepare_list_copied{false};\n    // whether child has catched up with parent during async-learn\n    bool is_caught_up{false};\n\n    // child replica async learn parent states\n    task_ptr async_learn_task;\n\n    // partition split states checker, start when initialize child replica\n    // see more in function `child_check_split_context` and `parent_check_states`\n    task_ptr check_state_task;\n\n    // Used for split related perf-counter\n    uint64_t splitting_start_ts_ns{0};\n    uint64_t splitting_start_async_learn_ts_ns{0};\n    uint64_t splitting_copy_file_count{0};\n    uint64_t splitting_copy_file_size{0};\n    uint64_t splitting_copy_mutation_count{0};\n};\n\n//---------------inline impl----------------------------------------------------------------\n\ninline partition_status::type primary_context::get_node_status(::dsn::rpc_address addr) const\n{\n    auto it = statuses.find(addr);\n    return it != statuses.end() ? it->second : partition_status::PS_INACTIVE;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_disk_migrator.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include \"replica/replica_stub.h\"\n#include \"replica_disk_migrator.h\"\n\n#include <boost/algorithm/string/replace.hpp>\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nconst std::string replica_disk_migrator::kReplicaDirTempSuffix = \".disk.migrate.tmp\";\nconst std::string replica_disk_migrator::kReplicaDirOriginSuffix = \".disk.migrate.ori\";\nconst std::string replica_disk_migrator::kDataDirFolder = \"data/rdb/\";\n\nreplica_disk_migrator::replica_disk_migrator(replica *r) : replica_base(r), _replica(r) {}\n\nreplica_disk_migrator::~replica_disk_migrator() = default;\n\n// THREAD_POOL_DEFAULT\nvoid replica_disk_migrator::on_migrate_replica(replica_disk_migrate_rpc rpc)\n{\n    tasking::enqueue(\n        LPC_REPLICATION_COMMON,\n        _replica->tracker(),\n        [=]() {\n\n            if (!check_migration_args(rpc)) {\n                return;\n            }\n\n            _status = disk_migration_status::MOVING;\n            ddebug_replica(\n                \"received replica disk migrate request(origin={}, target={}), update status \"\n                \"from {}=>{}\",\n                rpc.request().origin_disk,\n                rpc.request().target_disk,\n                enum_to_string(disk_migration_status::IDLE),\n                enum_to_string(status()));\n\n            const auto &request = rpc.request();\n            tasking::enqueue(LPC_REPLICATION_LONG_COMMON, _replica->tracker(), [=]() {\n                migrate_replica(request);\n            });\n        },\n        get_gpid().thread_hash());\n}\n\n// THREAD_POOL_REPLICATION\nbool replica_disk_migrator::check_migration_args(replica_disk_migrate_rpc rpc)\n{\n    _replica->_checker.only_one_thread_access();\n\n    const replica_disk_migrate_request &req = rpc.request();\n    replica_disk_migrate_response &resp = rpc.response();\n\n    // TODO(jiashuo1) may need manager control migration flow\n    if (status() != disk_migration_status::IDLE) {\n        std::string err_msg =\n            fmt::format(\"Existed migrate task({}) is running\", enum_to_string(status()));\n        derror_replica(\"received replica disk migrate request(origin={}, target={}), err = {}\",\n                       req.origin_disk,\n                       req.target_disk,\n                       err_msg);\n        resp.err = ERR_BUSY;\n        resp.__set_hint(err_msg);\n        return false;\n    }\n\n    if (_replica->status() != partition_status::type::PS_SECONDARY) {\n        std::string err_msg =\n            fmt::format(\"Invalid partition status({})\", enum_to_string(_replica->status()));\n        derror_replica(\"received replica disk migrate request(origin={}, target={}), err = {}\",\n                       req.origin_disk,\n                       req.target_disk,\n                       err_msg);\n        resp.err = ERR_INVALID_STATE;\n        resp.__set_hint(err_msg);\n        return false;\n    }\n\n    if (req.origin_disk == req.target_disk) {\n        std::string err_msg = fmt::format(\n            \"Invalid disk tag(origin({}) equal target({}))\", req.origin_disk, req.target_disk);\n        derror_replica(\"received replica disk migrate request(origin={}, target={}), err = {}\",\n                       req.origin_disk,\n                       req.target_disk,\n                       err_msg);\n        resp.err = ERR_INVALID_PARAMETERS;\n        resp.__set_hint(err_msg);\n        return false;\n    }\n\n    bool valid_origin_disk = false;\n    bool valid_target_disk = false;\n    // _dir_nodes: std::vector<std::shared_ptr<dir_node>>\n    for (const auto &dir_node : _replica->get_replica_stub()->_fs_manager._dir_nodes) {\n        if (dir_node->tag == req.origin_disk) {\n            valid_origin_disk = true;\n            if (!dir_node->has(req.pid)) {\n                std::string err_msg =\n                    fmt::format(\"Invalid replica(replica({}) doesn't exist on origin disk({}))\",\n                                req.pid,\n                                req.origin_disk);\n                derror_replica(\n                    \"received replica disk migrate request(origin={}, target={}), err = {}\",\n                    req.origin_disk,\n                    req.target_disk,\n                    err_msg);\n                resp.err = ERR_OBJECT_NOT_FOUND;\n                resp.__set_hint(err_msg);\n                return false;\n            }\n        }\n\n        if (dir_node->tag == req.target_disk) {\n            valid_target_disk = true;\n            if (dir_node->has(get_gpid())) {\n                std::string err_msg =\n                    fmt::format(\"Invalid replica(replica({}) has existed on target disk({}))\",\n                                req.pid,\n                                req.target_disk);\n                derror_replica(\n                    \"received replica disk migrate request(origin={}, target={}), err = {}\",\n                    req.origin_disk,\n                    req.target_disk,\n                    err_msg);\n                resp.err = ERR_PATH_ALREADY_EXIST;\n                resp.__set_hint(err_msg);\n                return false;\n            }\n        }\n    }\n\n    if (!valid_origin_disk || !valid_target_disk) {\n        std::string invalid_disk_tag = !valid_origin_disk ? req.origin_disk : req.target_disk;\n        std::string err_msg = fmt::format(\"Invalid disk tag({} doesn't exist)\", invalid_disk_tag);\n        derror_replica(\"received replica disk migrate request(origin={}, target={}), err = {}\",\n                       req.origin_disk,\n                       req.target_disk,\n                       err_msg);\n        resp.err = ERR_OBJECT_NOT_FOUND;\n        resp.__set_hint(err_msg);\n        return false;\n    }\n\n    resp.err = ERR_OK;\n    return true;\n}\n\n// THREAD_POOL_REPLICATION_LONG\nvoid replica_disk_migrator::migrate_replica(const replica_disk_migrate_request &req)\n{\n    dassert_replica(status() == disk_migration_status::MOVING,\n                    \"disk migration(origin={}, target={}), err = Invalid migration status({})\",\n                    req.origin_disk,\n                    req.target_disk,\n                    enum_to_string(status()));\n\n    if (init_target_dir(req) && migrate_replica_checkpoint(req) && migrate_replica_app_info(req)) {\n        _status = disk_migration_status::MOVED;\n        ddebug_replica(\"disk migration(origin={}, target={}) copy data complete, update status \"\n                       \"from {}=>{}, ready to \"\n                       \"close origin replica({})\",\n                       req.origin_disk,\n                       req.target_disk,\n                       enum_to_string(disk_migration_status::MOVING),\n                       enum_to_string(status()),\n                       _replica->dir());\n\n        close_current_replica(req);\n    }\n}\n\n// THREAD_POOL_REPLICATION_LONG\nbool replica_disk_migrator::init_target_dir(const replica_disk_migrate_request &req)\n{\n    FAIL_POINT_INJECT_F(\"init_target_dir\", [this](string_view) -> bool {\n        reset_status();\n        return false;\n    });\n    // replica_dir: /root/origin_disk_tag/gpid.app_type\n    std::string replica_dir = _replica->dir();\n    // using origin dir to init new dir\n    boost::replace_first(replica_dir, req.origin_disk, req.target_disk);\n    if (utils::filesystem::directory_exists(replica_dir)) {\n        derror_replica(\"migration target replica dir({}) has existed\", replica_dir);\n        reset_status();\n        return false;\n    }\n\n    // _target_replica_dir = /root/target_disk_tag/gpid.app_type.disk.migrate.tmp, it will update to\n    // /root/target_disk_tag/gpid.app_type in replica_disk_migrator::update_replica_dir finally\n    _target_replica_dir = fmt::format(\"{}{}\", replica_dir, kReplicaDirTempSuffix);\n    if (utils::filesystem::directory_exists(_target_replica_dir)) {\n        dwarn_replica(\"disk migration(origin={}, target={}) target replica dir({}) has existed, \"\n                      \"delete it now\",\n                      req.origin_disk,\n                      req.target_disk,\n                      _target_replica_dir);\n        utils::filesystem::remove_path(_target_replica_dir);\n    }\n\n    //  _target_replica_data_dir = /root/gpid.app_type.disk.migrate.tmp/data/rdb, it will update to\n    //  /root/target/gpid.app_type/data/rdb in replica_disk_migrator::update_replica_dir finally\n    _target_data_dir = utils::filesystem::path_combine(_target_replica_dir, kDataDirFolder);\n    if (!utils::filesystem::create_directory(_target_data_dir)) {\n        derror_replica(\n            \"disk migration(origin={}, target={}) create target temp data dir({}) failed\",\n            req.origin_disk,\n            req.target_disk,\n            _target_data_dir);\n        reset_status();\n        return false;\n    }\n\n    return true;\n}\n\n// THREAD_POOL_REPLICATION_LONG\nbool replica_disk_migrator::migrate_replica_checkpoint(const replica_disk_migrate_request &req)\n{\n    FAIL_POINT_INJECT_F(\"migrate_replica_checkpoint\", [this](string_view) -> bool {\n        reset_status();\n        return false;\n    });\n\n    const auto &sync_checkpoint_err = _replica->get_app()->sync_checkpoint();\n    if (sync_checkpoint_err != ERR_OK) {\n        derror_replica(\"disk migration(origin={}, target={}) sync_checkpoint failed({})\",\n                       req.origin_disk,\n                       req.target_disk,\n                       sync_checkpoint_err.to_string());\n        reset_status();\n        return false;\n    }\n\n    const auto &copy_checkpoint_err =\n        _replica->get_app()->copy_checkpoint_to_dir(_target_data_dir.c_str(), 0 /*last_decree*/);\n    if (copy_checkpoint_err != ERR_OK) {\n        derror_replica(\"disk migration(origin={}, target={}) copy checkpoint to dir({}) \"\n                       \"failed(error={}), the dir({}) will be deleted\",\n                       req.origin_disk,\n                       req.target_disk,\n                       _target_data_dir,\n                       copy_checkpoint_err.to_string(),\n                       _target_replica_dir);\n        reset_status();\n        utils::filesystem::remove_path(_target_replica_dir);\n        return false;\n    }\n\n    return true;\n}\n\n// THREAD_POOL_REPLICATION_LONG\nbool replica_disk_migrator::migrate_replica_app_info(const replica_disk_migrate_request &req)\n{\n    FAIL_POINT_INJECT_F(\"migrate_replica_app_info\", [this](string_view) -> bool {\n        reset_status();\n        return false;\n    });\n    replica_init_info init_info = _replica->get_app()->init_info();\n    const auto &store_init_info_err = init_info.store(_target_replica_dir);\n    if (store_init_info_err != ERR_OK) {\n        derror_replica(\"disk migration(origin={}, target={}) stores app init info failed({})\",\n                       req.origin_disk,\n                       req.target_disk,\n                       store_init_info_err.to_string());\n        reset_status();\n        return false;\n    }\n\n    const auto &store_info_err = _replica->store_app_info(\n        _replica->_app_info,\n        utils::filesystem::path_combine(_target_replica_dir, replica::kAppInfo));\n    if (store_info_err != ERR_OK) {\n        derror_replica(\"disk migration(origin={}, target={}) stores app info failed({})\",\n                       req.origin_disk,\n                       req.target_disk,\n                       store_info_err.to_string());\n        reset_status();\n        return false;\n    }\n\n    return true;\n}\n\n// THREAD_POOL_REPLICATION_LONG\ndsn::task_ptr replica_disk_migrator::close_current_replica(const replica_disk_migrate_request &req)\n{\n    if (_replica->status() != partition_status::type::PS_SECONDARY) {\n        derror_replica(\"migrate request(origin={}, target={}), err = Invalid partition status({})\",\n                       req.origin_disk,\n                       req.target_disk,\n                       enum_to_string(_replica->status()));\n        reset_status();\n        utils::filesystem::remove_path(_target_replica_dir);\n        return nullptr;\n    }\n\n    return _replica->_stub->begin_close_replica(_replica);\n}\n\n// run in replica->close_replica() of THREAD_POOL_REPLICATION_LONG\nvoid replica_disk_migrator::update_replica_dir()\n{\n    // origin_tmp_dir: /root/origin/gpid.app_type.disk.migrate.ori\n    std::string origin_temp_dir = fmt::format(\"{}{}\", _replica->dir(), kReplicaDirOriginSuffix);\n    if (!dsn::utils::filesystem::rename_path(_replica->dir(), origin_temp_dir)) {\n        reset_status();\n        utils::filesystem::remove_path(_target_replica_dir);\n        return;\n    }\n\n    std::string target_temp_dir = _target_replica_dir;\n    // update _target_replica_dir /root/gpid.app_type.disk.migrate.tmp/ to\n    // /root/target/gpid.app_type/\n    boost::replace_first(_target_replica_dir, kReplicaDirTempSuffix, \"\");\n    if (!dsn::utils::filesystem::rename_path(target_temp_dir, _target_replica_dir)) {\n        reset_status();\n        // rename failed, delete tmp dir and revert origin dir\n        utils::filesystem::remove_path(target_temp_dir);\n        dsn::utils::filesystem::rename_path(origin_temp_dir, _replica->dir());\n        return;\n    }\n\n    _replica->get_replica_stub()->_fs_manager.remove_replica(get_gpid());\n    _replica->get_replica_stub()->_fs_manager.add_replica(get_gpid(), _target_replica_dir);\n    _replica->get_replica_stub()->on_disk_stat();\n\n    _status = disk_migration_status::CLOSED;\n    ddebug_replica(\"disk replica migration move data from origin dir({}) to new dir({}) \"\n                   \"succeed, update status from {}=>{}\",\n                   _replica->dir(),\n                   _target_replica_dir,\n                   enum_to_string(disk_migration_status::MOVED),\n                   enum_to_string(status()));\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_disk_migrator.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#pragma once\n\nnamespace dsn {\nnamespace replication {\nclass replica;\n\nclass replica_disk_migrator : replica_base\n{\npublic:\n    explicit replica_disk_migrator(replica *r);\n    ~replica_disk_migrator();\n\n    void on_migrate_replica(replica_disk_migrate_rpc rpc);\n\n    disk_migration_status::type status() const { return _status; }\n\n    void set_status(const disk_migration_status::type &status) { _status = status; }\n\nprivate:\n    bool check_migration_args(replica_disk_migrate_rpc rpc);\n\n    void migrate_replica(const replica_disk_migrate_request &req);\n\n    bool init_target_dir(const replica_disk_migrate_request &req);\n    bool migrate_replica_checkpoint(const replica_disk_migrate_request &req);\n    bool migrate_replica_app_info(const replica_disk_migrate_request &req);\n    /// return nullptr if close failed. The returned value is only used in unit-tests.\n    dsn::task_ptr close_current_replica(const replica_disk_migrate_request &req);\n    void update_replica_dir();\n\n    void reset_status() { _status = disk_migration_status::IDLE; }\n\nprivate:\n    const static std::string kReplicaDirTempSuffix;\n    const static std::string kReplicaDirOriginSuffix;\n    const static std::string kDataDirFolder;\n\n    replica *_replica;\n\n    std::string _target_replica_dir; // /root/ssd_tag/gpid.pegasus/\n    std::string _target_data_dir;    // /root/ssd_tag/gpid.pegasus/data/rdb\n    disk_migration_status::type _status{disk_migration_status::IDLE};\n\n    friend class replica;\n    friend class replica_stub;\n    friend class replica_disk_migrate_test;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_failover.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     failure handling in replica\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nvoid replica::handle_local_failure(error_code error)\n{\n    ddebug(\"%s: handle local failure error %s, status = %s\",\n           name(),\n           error.to_string(),\n           enum_to_string(status()));\n\n    if (status() == partition_status::PS_PRIMARY) {\n        _stub->remove_replica_on_meta_server(_app_info, _primary_states.membership);\n    }\n\n    update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR);\n}\n\nvoid replica::handle_remote_failure(partition_status::type st,\n                                    ::dsn::rpc_address node,\n                                    error_code error,\n                                    const std::string &caused_by)\n{\n    derror(\"%s: handle remote failure caused by %s, error = %s, status = %s, node = %s\",\n           name(),\n           caused_by.c_str(),\n           error.to_string(),\n           enum_to_string(st),\n           node.to_string());\n\n    dassert(status() == partition_status::PS_PRIMARY,\n            \"invalid partition_status, status = %s\",\n            enum_to_string(status()));\n    dassert(\n        node != _stub->_primary_address, \"%s VS %s\", node.to_string(), _stub->_primary_address_str);\n\n    switch (st) {\n    case partition_status::PS_SECONDARY:\n        dassert(_primary_states.check_exist(node, partition_status::PS_SECONDARY),\n                \"invalid node address, address = %s, status = %s\",\n                node.to_string(),\n                enum_to_string(st));\n        {\n            configuration_update_request request;\n            request.node = node;\n            request.type = config_type::CT_DOWNGRADE_TO_INACTIVE;\n            request.config = _primary_states.membership;\n            downgrade_to_inactive_on_primary(request);\n        }\n        break;\n    case partition_status::PS_POTENTIAL_SECONDARY: {\n        ddebug(\"%s: remove learner %s for remote failure\", name(), node.to_string());\n        // potential secondary failure does not lead to ballot change\n        // therefore, it is possible to have multiple exec here\n        _primary_states.learners.erase(node);\n        _primary_states.statuses.erase(node);\n    } break;\n    case partition_status::PS_INACTIVE:\n    case partition_status::PS_ERROR:\n        break;\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", enum_to_string(st));\n        break;\n    }\n}\n\nvoid replica::on_meta_server_disconnected()\n{\n    ddebug(\"%s: meta server disconnected\", name());\n\n    auto old_status = status();\n    update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE);\n\n    // make sure they can be back directly\n    if (old_status == partition_status::PS_PRIMARY ||\n        old_status == partition_status::PS_SECONDARY) {\n        set_inactive_state_transient(true);\n    }\n}\n}\n} // namespace\n"
  },
  {
    "path": "src/replica/replica_http_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <nlohmann/json.hpp>\n#include <fmt/format.h>\n#include <dsn/utility/output_utils.h>\n#include \"replica_http_service.h\"\n#include \"duplication/duplication_sync_timer.h\"\n\nnamespace dsn {\nnamespace replication {\n\nvoid replica_http_service::query_duplication_handler(const http_request &req, http_response &resp)\n{\n    if (!_stub->_duplication_sync_timer) {\n        resp.body = \"duplication is not enabled [duplication_enabled=false]\";\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n    auto it = req.query_args.find(\"appid\");\n    if (it == req.query_args.end()) {\n        resp.body = \"appid should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n    int32_t appid = -1;\n    if (!buf2int32(it->second, appid) || appid < 0) {\n        resp.status_code = http_status_code::bad_request;\n        resp.body = fmt::format(\"invalid appid={}\", it->second);\n        return;\n    }\n    bool app_found = false;\n    auto states = _stub->_duplication_sync_timer->get_dup_states(appid, &app_found);\n    if (!app_found) {\n        resp.status_code = http_status_code::not_found;\n        resp.body = fmt::format(\"no primary for app [appid={}]\", appid);\n        return;\n    }\n    if (states.empty()) {\n        resp.status_code = http_status_code::not_found;\n        resp.body = fmt::format(\"no duplication assigned for app [appid={}]\", appid);\n        return;\n    }\n\n    nlohmann::json json;\n    for (const auto &s : states) {\n        json[std::to_string(s.first)][s.second.id.to_string()] = nlohmann::json{\n            {\"duplicating\", s.second.duplicating},\n            {\"not_confirmed_mutations_num\", s.second.not_confirmed},\n            {\"not_duplicated_mutations_num\", s.second.not_duplicated},\n            {\"fail_mode\", duplication_fail_mode_to_string(s.second.fail_mode)},\n        };\n    }\n    resp.status_code = http_status_code::ok;\n    resp.body = json.dump();\n}\n\nvoid replica_http_service::query_app_data_version_handler(const http_request &req,\n                                                          http_response &resp)\n{\n    auto it = req.query_args.find(\"app_id\");\n    if (it == req.query_args.end()) {\n        resp.body = \"app_id should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    int32_t app_id = -1;\n    if (!buf2int32(it->second, app_id) || app_id < 0) {\n        resp.body = fmt::format(\"invalid app_id={}\", it->second);\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    // partition_index -> data_version\n    std::unordered_map<int32_t, uint32_t> version_map;\n    _stub->query_app_data_version(app_id, version_map);\n\n    if (version_map.size() == 0) {\n        resp.body = fmt::format(\"app_id={} not found\", it->second);\n        resp.status_code = http_status_code::not_found;\n        return;\n    }\n\n    nlohmann::json json;\n    for (const auto &kv : version_map) {\n        json[std::to_string(kv.first)] = nlohmann::json{\n            {\"data_version\", std::to_string(kv.second)},\n        };\n    }\n    resp.status_code = http_status_code::ok;\n    resp.body = json.dump();\n}\n\nvoid replica_http_service::query_manual_compaction_handler(const http_request &req,\n                                                           http_response &resp)\n{\n    auto it = req.query_args.find(\"app_id\");\n    if (it == req.query_args.end()) {\n        resp.body = \"app_id should not be empty\";\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    int32_t app_id = -1;\n    if (!buf2int32(it->second, app_id) || app_id < 0) {\n        resp.body = fmt::format(\"invalid app_id={}\", it->second);\n        resp.status_code = http_status_code::bad_request;\n        return;\n    }\n\n    std::unordered_map<gpid, manual_compaction_status::type> partition_compaction_status;\n    _stub->query_app_manual_compact_status(app_id, partition_compaction_status);\n\n    int32_t idle_count = 0;\n    int32_t running_count = 0;\n    int32_t queuing_count = 0;\n    int32_t finished_count = 0;\n    for (const auto &kv : partition_compaction_status) {\n        if (kv.second == manual_compaction_status::RUNNING) {\n            running_count++;\n        } else if (kv.second == manual_compaction_status::QUEUING) {\n            queuing_count++;\n        } else if (kv.second == manual_compaction_status::FINISHED) {\n            finished_count++;\n        } else if (kv.second == manual_compaction_status::IDLE) {\n            idle_count++;\n        }\n    }\n\n    nlohmann::json json;\n    json[\"status\"] = nlohmann::json{\n        {manual_compaction_status_to_string(manual_compaction_status::IDLE), idle_count},\n        {manual_compaction_status_to_string(manual_compaction_status::RUNNING), running_count},\n        {manual_compaction_status_to_string(manual_compaction_status::QUEUING), queuing_count},\n        {manual_compaction_status_to_string(manual_compaction_status::FINISHED), finished_count}};\n    resp.status_code = http_status_code::ok;\n    resp.body = json.dump();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_http_service.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/http/http_server.h>\n\n#include \"replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_http_service : public http_service\n{\npublic:\n    explicit replica_http_service(replica_stub *stub) : _stub(stub)\n    {\n        register_handler(\"duplication\",\n                         std::bind(&replica_http_service::query_duplication_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/replica/duplication?appid=<appid>\");\n        register_handler(\"data_version\",\n                         std::bind(&replica_http_service::query_app_data_version_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/replica/data_version?app_id=<app_id>\");\n        register_handler(\"manual_compaction\",\n                         std::bind(&replica_http_service::query_manual_compaction_handler,\n                                   this,\n                                   std::placeholders::_1,\n                                   std::placeholders::_2),\n                         \"ip:port/replica/maual_compaction?app_id=<app_id>\");\n    }\n\n    std::string path() const override { return \"replica\"; }\n\n    void query_duplication_handler(const http_request &req, http_response &resp);\n    void query_app_data_version_handler(const http_request &req, http_response &resp);\n    void query_manual_compaction_handler(const http_request &req, http_response &resp);\n\n    inline const char *manual_compaction_status_to_string(manual_compaction_status::type status)\n    {\n        switch (status) {\n        case manual_compaction_status::IDLE:\n            return \"idle\";\n        case manual_compaction_status::QUEUING:\n            return \"queuing\";\n        case manual_compaction_status::RUNNING:\n            return \"running\";\n        case manual_compaction_status::FINISHED:\n            return \"finished\";\n        default:\n            dassert(false, \"invalid status({})\", status);\n            __builtin_unreachable();\n        }\n    }\n\nprivate:\n    replica_stub *_stub;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_init.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"backup/replica_backup_manager.h\"\n#include \"duplication/replica_follower.h\"\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nerror_code replica::initialize_on_new()\n{\n    // if (dsn::utils::filesystem::directory_exists(_dir) &&\n    //    !dsn::utils::filesystem::remove_path(_dir))\n    //{\n    //    derror(\"cannot allocate new replica @ %s, as the dir is already exists\", _dir.c_str());\n    //    return ERR_PATH_ALREADY_EXIST;\n    //}\n    //\n    // TODO: check if _dir contain other file or directory except for\n    // \"restore.policy_name.backup_id\"\n    // which is applied to restore from cold backup\n    if (!dsn::utils::filesystem::directory_exists(_dir) &&\n        !dsn::utils::filesystem::create_directory(_dir)) {\n        derror(\"cannot allocate new replica @ %s, because create dir failed\", _dir.c_str());\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    auto err = store_app_info(_app_info);\n    if (err != ERR_OK) {\n        dsn::utils::filesystem::remove_path(_dir);\n        return err;\n    }\n\n    return init_app_and_prepare_list(true);\n}\n\n/*static*/ replica *replica::newr(replica_stub *stub,\n                                  gpid gpid,\n                                  const app_info &app,\n                                  bool restore_if_necessary,\n                                  bool is_duplication_follower,\n                                  const std::string &parent_dir)\n{\n    std::string dir;\n    if (parent_dir.empty()) {\n        dir = stub->get_replica_dir(app.app_type.c_str(), gpid);\n    } else {\n        dir = stub->get_child_dir(app.app_type.c_str(), gpid, parent_dir);\n    }\n    replica *rep =\n        new replica(stub, gpid, app, dir.c_str(), restore_if_necessary, is_duplication_follower);\n    error_code err;\n    if (restore_if_necessary && (err = rep->restore_checkpoint()) != dsn::ERR_OK) {\n        derror_f(\"{}: try to restore replica failed, error({})\", rep->name(), err.to_string());\n        return clear_on_failure(stub, rep, dir, gpid);\n    }\n\n    if (is_duplication_follower &&\n        (err = rep->get_replica_follower()->duplicate_checkpoint()) != dsn::ERR_OK) {\n        derror_f(\"{}: try to duplicate replica checkpoint failed, error({}) and please check \"\n                 \"previous detail error log\",\n                 rep->name(),\n                 err.to_string());\n        return clear_on_failure(stub, rep, dir, gpid);\n    }\n\n    err = rep->initialize_on_new();\n    if (err == ERR_OK) {\n        dinfo_f(\"{}: new replica succeed\", rep->name());\n        return rep;\n    } else {\n        derror_f(\"{}: new replica failed, err = {}\", rep->name(), err.to_string());\n        return clear_on_failure(stub, rep, dir, gpid);\n    }\n}\n\n/* static */ replica *replica::clear_on_failure(replica_stub *stub,\n                                                replica *rep,\n                                                const std::string &path,\n                                                const gpid &pid)\n{\n    rep->close();\n    delete rep;\n    rep = nullptr;\n\n    // clear work on failure\n    utils::filesystem::remove_path(path);\n    stub->_fs_manager.remove_replica(pid);\n    return nullptr;\n}\n\nerror_code replica::initialize_on_load()\n{\n    ddebug(\"%s: initialize replica on load, dir = %s\", name(), _dir.c_str());\n\n    if (!dsn::utils::filesystem::directory_exists(_dir)) {\n        derror(\"%s: cannot load replica, because dir %s is not exist\", name(), _dir.c_str());\n        return ERR_PATH_NOT_FOUND;\n    }\n\n    return init_app_and_prepare_list(false);\n}\n\n/*static*/ replica *replica::load(replica_stub *stub, const char *dir)\n{\n    FAIL_POINT_INJECT_F(\"mock_replica_load\", [&](string_view) -> replica * { return nullptr; });\n\n    char splitters[] = {'\\\\', '/', 0};\n    std::string name = utils::get_last_component(std::string(dir), splitters);\n    if (name == \"\") {\n        derror(\"invalid replica dir %s\", dir);\n        return nullptr;\n    }\n\n    char app_type[128];\n    int32_t app_id, pidx;\n    if (3 != sscanf(name.c_str(), \"%d.%d.%s\", &app_id, &pidx, app_type)) {\n        derror(\"invalid replica dir %s\", dir);\n        return nullptr;\n    }\n\n    gpid pid(app_id, pidx);\n    if (!utils::filesystem::directory_exists(dir)) {\n        derror(\"replica dir %s not exist\", dir);\n        return nullptr;\n    }\n\n    dsn::app_info info;\n    replica_app_info info2(&info);\n    std::string path = utils::filesystem::path_combine(dir, kAppInfo);\n    auto err = info2.load(path);\n    if (ERR_OK != err) {\n        derror(\"load app-info from %s failed, err = %s\", path.c_str(), err.to_string());\n        return nullptr;\n    }\n\n    if (info.app_type != app_type) {\n        derror(\"unmatched app type %s for %s\", info.app_type.c_str(), path.c_str());\n        return nullptr;\n    }\n\n    if (info.partition_count < pidx) {\n        derror_f(\"partition[{}], count={}, this replica may be partition split garbage partition, \"\n                 \"ignore it\",\n                 pid,\n                 info.partition_count);\n        return nullptr;\n    }\n\n    replica *rep = new replica(stub, pid, info, dir, false);\n\n    err = rep->initialize_on_load();\n    if (err == ERR_OK) {\n        ddebug(\"%s: load replica succeed\", rep->name());\n        return rep;\n    } else {\n        derror(\"%s: load replica failed, err = %s\", rep->name(), err.to_string());\n        rep->close();\n        delete rep;\n        rep = nullptr;\n\n        // clear work on failure\n        if (dsn::utils::filesystem::directory_exists(dir)) {\n            char rename_dir[1024];\n            sprintf(rename_dir, \"%s.%\" PRIu64 \".err\", dir, dsn_now_us());\n            bool ret = dsn::utils::filesystem::rename_path(dir, rename_dir);\n            dassert(ret, \"load_replica: failed to move directory '%s' to '%s'\", dir, rename_dir);\n            dwarn(\"load_replica: {replica_dir_op} succeed to move directory '%s' to '%s'\",\n                  dir,\n                  rename_dir);\n            stub->_counter_replicas_recent_replica_move_error_count->increment();\n            stub->_fs_manager.remove_replica(pid);\n        }\n\n        return nullptr;\n    }\n}\n\ndecree replica::get_replay_start_decree()\n{\n    decree replay_start_decree = _app->last_committed_decree();\n    ddebug_replica(\"start to replay private log [replay_start_decree: {}]\", replay_start_decree);\n    return replay_start_decree;\n}\n\nerror_code replica::init_app_and_prepare_list(bool create_new)\n{\n    dassert(nullptr == _app, \"\");\n    error_code err;\n    std::string log_dir = utils::filesystem::path_combine(dir(), \"plog\");\n\n    _app.reset(replication_app_base::new_storage_instance(_app_info.app_type, this));\n    dassert(nullptr == _private_log, \"private log must not be initialized yet\");\n\n    if (create_new) {\n        err = _app->open_new_internal(this, _stub->_log->on_partition_reset(get_gpid(), 0), 0);\n        // two case:\n        //      1, just open a new app, in this case, the last_committed_decree and\n        //      last_durable_decree\n        //         and committed_decree of prepare_list are all equal, and is 0\n        //      2, open app with some data, but don't have slog and plog and also don't have\n        //      app_info;\n        //         in this case, last_committed_decree = last_durable_decree >= 0, but\n        //         last_committed_decree\n        //         in prepare_list is 0, so should make it equal to last_committed_decree in app\n        _prepare_list->reset(_app->last_committed_decree());\n    } else {\n        err = _app->open_internal(this);\n        if (err == ERR_OK) {\n            dassert(_app->last_committed_decree() == _app->last_durable_decree(),\n                    \"invalid app state, %\" PRId64 \" VS %\" PRId64 \"\",\n                    _app->last_committed_decree(),\n                    _app->last_durable_decree());\n            _config.ballot = _app->init_info().init_ballot;\n            _prepare_list->reset(_app->last_committed_decree());\n\n            _private_log = new mutation_log_private(\n                log_dir, _options->log_private_file_size_mb, get_gpid(), this);\n            ddebug(\"%s: plog_dir = %s\", name(), log_dir.c_str());\n\n            // sync valid_start_offset between app and logs\n            _stub->_log->set_valid_start_offset_on_open(\n                get_gpid(), _app->init_info().init_offset_in_shared_log);\n            _private_log->set_valid_start_offset_on_open(\n                get_gpid(), _app->init_info().init_offset_in_private_log);\n\n            // replay the logs\n            {\n                std::map<gpid, decree> replay_condition;\n                replay_condition[_config.pid] = get_replay_start_decree();\n\n                uint64_t start_time = dsn_now_ms();\n                err = _private_log->open(\n                    [this](int log_length, mutation_ptr &mu) { return replay_mutation(mu, true); },\n                    [this](error_code err) {\n                        tasking::enqueue(LPC_REPLICATION_ERROR,\n                                         &_tracker,\n                                         [this, err]() { handle_local_failure(err); },\n                                         get_gpid().thread_hash());\n                    },\n                    replay_condition);\n\n                uint64_t finish_time = dsn_now_ms();\n\n                if (err == ERR_OK) {\n                    ddebug(\"%s: replay private log succeed, durable = %\" PRId64\n                           \", committed = %\" PRId64 \", \"\n                           \"max_prepared = %\" PRId64 \", ballot = %\" PRId64\n                           \", valid_offset_in_plog = %\" PRId64 \", \"\n                           \"max_decree_in_plog = %\" PRId64 \", max_commit_on_disk_in_plog = %\" PRId64\n                           \", \"\n                           \"time_used = %\" PRIu64 \" ms\",\n                           name(),\n                           _app->last_durable_decree(),\n                           _app->last_committed_decree(),\n                           max_prepared_decree(),\n                           get_ballot(),\n                           _app->init_info().init_offset_in_private_log,\n                           _private_log->max_decree(get_gpid()),\n                           _private_log->max_commit_on_disk(),\n                           finish_time - start_time);\n\n                    _private_log->check_valid_start_offset(\n                        get_gpid(), _app->init_info().init_offset_in_private_log);\n\n                    set_inactive_state_transient(true);\n                }\n                /* in the beginning the prepare_list is reset to the durable_decree */\n                else {\n                    derror(\"%s: replay private log failed, err = %s, durable = %\" PRId64\n                           \", committed = %\" PRId64 \", \"\n                           \"maxpd = %\" PRId64 \", ballot = %\" PRId64\n                           \", valid_offset_in_plog = %\" PRId64 \", \"\n                           \"time_used = %\" PRIu64 \" ms\",\n                           name(),\n                           err.to_string(),\n                           _app->last_durable_decree(),\n                           _app->last_committed_decree(),\n                           max_prepared_decree(),\n                           get_ballot(),\n                           _app->init_info().init_offset_in_private_log,\n                           finish_time - start_time);\n\n                    _private_log->close();\n                    _private_log = nullptr;\n\n                    _stub->_log->on_partition_removed(get_gpid());\n                }\n            }\n        }\n    }\n\n    if (err != ERR_OK) {\n        derror(\"%s: open replica failed, err = %s\", name(), err.to_string());\n        _app->close(false);\n        _app = nullptr;\n    } else {\n        _is_initializing = true;\n        _app->set_partition_version(_app_info.partition_count - 1);\n\n        if (nullptr == _private_log) {\n            ddebug(\"%s: clear private log, dir = %s\", name(), log_dir.c_str());\n            if (!dsn::utils::filesystem::remove_path(log_dir)) {\n                dassert(false, \"Fail to delete directory %s.\", log_dir.c_str());\n            }\n            if (!::dsn::utils::filesystem::create_directory(log_dir)) {\n                dassert(false, \"Fail to create directory %s.\", log_dir.c_str());\n            }\n\n            _private_log = new mutation_log_private(\n                log_dir, _options->log_private_file_size_mb, get_gpid(), this);\n            ddebug(\"%s: plog_dir = %s\", name(), log_dir.c_str());\n\n            err = _private_log->open(nullptr, [this](error_code err) {\n                tasking::enqueue(LPC_REPLICATION_ERROR,\n                                 &_tracker,\n                                 [this, err]() { handle_local_failure(err); },\n                                 get_gpid().thread_hash());\n            });\n        }\n\n        if (err == ERR_OK) {\n            if (_checkpoint_timer == nullptr && !_options->checkpoint_disabled) {\n                _checkpoint_timer = tasking::enqueue_timer(\n                    LPC_PER_REPLICA_CHECKPOINT_TIMER,\n                    &_tracker,\n                    [this] { on_checkpoint_timer(); },\n                    std::chrono::seconds(_options->checkpoint_interval_seconds),\n                    get_gpid().thread_hash());\n            }\n\n            _backup_mgr->start_collect_backup_info();\n        }\n    }\n\n    return err;\n}\n\n// return false only when the log is invalid:\n// - for private log, return false if offset < init_offset_in_private_log\n// - for shared log, return false if offset < init_offset_in_shared_log\nbool replica::replay_mutation(mutation_ptr &mu, bool is_private)\n{\n    auto d = mu->data.header.decree;\n    auto offset = mu->data.header.log_offset;\n\n    // it's very import to keep the ballot.\n    // for example, the recovery need it to select a proper primary\n    if (mu->data.header.ballot > get_ballot()) {\n        _config.ballot = mu->data.header.ballot;\n        bool ret = update_local_configuration(_config, true);\n        dassert(ret, \"\");\n    }\n\n    if (is_private && offset < _app->init_info().init_offset_in_private_log) {\n        dinfo(\"%s: replay mutation skipped1 as offset is invalid in private log, ballot = %\" PRId64\n              \", decree = %\" PRId64 \", last_committed_decree = %\" PRId64 \", offset = %\" PRId64,\n              name(),\n              mu->data.header.ballot,\n              d,\n              mu->data.header.last_committed_decree,\n              offset);\n        return false;\n    }\n\n    if (!is_private && offset < _app->init_info().init_offset_in_shared_log) {\n        dinfo(\"%s: replay mutation skipped2 as offset is invalid in shared log, ballot = %\" PRId64\n              \", decree = %\" PRId64 \", last_committed_decree = %\" PRId64 \", offset = %\" PRId64,\n              name(),\n              mu->data.header.ballot,\n              d,\n              mu->data.header.last_committed_decree,\n              offset);\n        return false;\n    }\n\n    // fix private log completeness when it is from shared\n    if (!is_private && d > _private_log->max_commit_on_disk()) {\n        _private_log->append(mu, LPC_WRITE_REPLICATION_LOG_COMMON, &_tracker, nullptr);\n    }\n\n    if (d <= last_committed_decree()) {\n        dinfo(\"%s: replay mutation skipped3 as decree is outdated, ballot = %\" PRId64\n              \", decree = %\" PRId64 \"(vs app %\" PRId64 \"), last_committed_decree = %\" PRId64\n              \", offset = %\" PRId64,\n              name(),\n              mu->data.header.ballot,\n              d,\n              last_committed_decree(),\n              mu->data.header.last_committed_decree,\n              offset);\n        return true;\n    }\n\n    auto old = _prepare_list->get_mutation_by_decree(d);\n    if (old != nullptr && old->data.header.ballot >= mu->data.header.ballot) {\n        dinfo(\"%s: replay mutation skipped4 as ballot is outdated, ballot = %\" PRId64\n              \" (vs local-ballot=%\" PRId64 \"), decree = %\" PRId64\n              \", last_committed_decree = %\" PRId64 \", offset = %\" PRId64,\n              name(),\n              mu->data.header.ballot,\n              old->data.header.ballot,\n              d,\n              mu->data.header.last_committed_decree,\n              offset);\n\n        return true;\n    }\n\n    dinfo(\"%s: replay mutation ballot = %\" PRId64 \", decree = %\" PRId64\n          \", last_committed_decree = %\" PRId64,\n          name(),\n          mu->data.header.ballot,\n          d,\n          mu->data.header.last_committed_decree);\n\n    // prepare\n    _uniq_timestamp_us.try_update(mu->data.header.timestamp);\n    error_code err = _prepare_list->prepare(mu, partition_status::PS_INACTIVE);\n    dcheck_eq_replica(err, ERR_OK);\n\n    return true;\n}\n\nvoid replica::set_inactive_state_transient(bool t)\n{\n    if (status() == partition_status::PS_INACTIVE) {\n        ddebug(\"%s: set inactive_is_transient from %s to %s\",\n               name(),\n               _inactive_is_transient ? \"true\" : \"false\",\n               t ? \"true\" : \"false\");\n        _inactive_is_transient = t;\n    }\n}\n\nvoid replica::reset_prepare_list_after_replay()\n{\n    // commit prepare list if possible\n    _prepare_list->commit(_app->last_committed_decree(), COMMIT_TO_DECREE_SOFT);\n\n    // align the prepare list and the app\n    _prepare_list->truncate(_app->last_committed_decree());\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_learn.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replication learning process\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"replica/duplication/replica_duplicator_manager.h\"\n\n#include <dsn/utility/filesystem.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace replication {\n\nvoid replica::init_learn(uint64_t signature)\n{\n    _checker.only_one_thread_access();\n\n    if (status() != partition_status::PS_POTENTIAL_SECONDARY) {\n        dwarn(\n            \"%s: state is not potential secondary but %s, skip learning with signature[%016\" PRIx64\n            \"]\",\n            name(),\n            enum_to_string(status()),\n            signature);\n        return;\n    }\n\n    if (signature == invalid_signature) {\n        dwarn(\"%s: invalid learning signature, skip\", name());\n        return;\n    }\n\n    // at most one learning task running\n    if (_potential_secondary_states.learning_round_is_running) {\n        dwarn(\"%s: previous learning is still running, skip learning with signature [%016\" PRIx64\n              \"]\",\n              name(),\n              signature);\n        return;\n    }\n\n    if (signature < _potential_secondary_states.learning_version) {\n        dwarn(\"%s: learning request is out-dated, therefore skipped: [%016\" PRIx64\n              \"] vs [%016\" PRIx64 \"]\",\n              name(),\n              signature,\n              _potential_secondary_states.learning_version);\n        return;\n    }\n\n    // learn timeout or primary change, the (new) primary starts another round of learning process\n    // be cautious: primary should not issue signatures frequently to avoid learning abort\n    if (signature != _potential_secondary_states.learning_version) {\n        if (!_potential_secondary_states.cleanup(false)) {\n            dwarn(\"%s: previous learning with signature[%016\" PRIx64\n                  \"] is still in-process, skip init new learning with signature [%016\" PRIx64 \"]\",\n                  name(),\n                  _potential_secondary_states.learning_version,\n                  signature);\n            return;\n        }\n\n        _stub->_counter_replicas_learning_recent_start_count->increment();\n\n        _potential_secondary_states.learning_version = signature;\n        _potential_secondary_states.learning_start_ts_ns = dsn_now_ns();\n        _potential_secondary_states.learning_status = learner_status::LearningWithoutPrepare;\n        _prepare_list->truncate(_app->last_committed_decree());\n    } else {\n        switch (_potential_secondary_states.learning_status) {\n        // any failues in the process\n        case learner_status::LearningFailed:\n            break;\n\n        // learned state (app state) completed\n        case learner_status::LearningWithPrepare:\n            dassert(_app->last_durable_decree() + 1 >=\n                        _potential_secondary_states.learning_start_prepare_decree,\n                    \"learned state is incomplete\");\n            {\n                // check missing state due to _app->flush to checkpoint the learned state\n                auto ac = _app->last_committed_decree();\n                auto pc = _prepare_list->last_committed_decree();\n\n                // TODO(qinzuoyan): to test the following lines\n                // missing commits\n                if (pc > ac) {\n                    // missed ones are covered by prepare list\n                    if (_prepare_list->count() > 0 && ac + 1 >= _prepare_list->min_decree()) {\n                        for (auto d = ac + 1; d <= pc; d++) {\n                            auto mu = _prepare_list->get_mutation_by_decree(d);\n                            dassert(nullptr != mu,\n                                    \"mutation must not be nullptr, decree = %\" PRId64 \"\",\n                                    d);\n                            auto err = _app->apply_mutation(mu);\n                            if (ERR_OK != err) {\n                                handle_learning_error(err, true);\n                                return;\n                            }\n                        }\n                    }\n\n                    // missed ones need to be loaded via private logs\n                    else {\n                        _stub->_counter_replicas_learning_recent_round_start_count->increment();\n                        _potential_secondary_states.learning_round_is_running = true;\n                        _potential_secondary_states.catchup_with_private_log_task =\n                            tasking::create_task(LPC_CATCHUP_WITH_PRIVATE_LOGS,\n                                                 &_tracker,\n                                                 [this]() {\n                                                     this->catch_up_with_private_logs(\n                                                         partition_status::PS_POTENTIAL_SECONDARY);\n                                                 },\n                                                 get_gpid().thread_hash());\n                        _potential_secondary_states.catchup_with_private_log_task->enqueue();\n\n                        return; // incomplete\n                    }\n                }\n\n                // no missing commits\n                else {\n                }\n\n                // convert to success if app state and prepare list is connected\n                _potential_secondary_states.learning_status = learner_status::LearningSucceeded;\n                // fall through to success\n            }\n\n        // app state and prepare list is connected\n        case learner_status::LearningSucceeded: {\n            check_state_completeness();\n            notify_learn_completion();\n            return;\n        } break;\n        case learner_status::LearningWithoutPrepare:\n            break;\n        default:\n            dassert(false,\n                    \"invalid learner_status, status = %s\",\n                    enum_to_string(_potential_secondary_states.learning_status));\n        }\n    }\n\n    if (_app->last_committed_decree() == 0 &&\n        _stub->_learn_app_concurrent_count.load() >= _options->learn_app_max_concurrent_count) {\n        dwarn(\"%s: init_learn[%016\" PRIx64 \"]: learnee = %s, learn_duration = %\" PRIu64\n              \"ms, need to learn app because app_committed_decree = 0, but \"\n              \"learn_app_concurrent_count(%d) >= learn_app_max_concurrent_count(%d), skip\",\n              name(),\n              _potential_secondary_states.learning_version,\n              _config.primary.to_string(),\n              _potential_secondary_states.duration_ms(),\n              _stub->_learn_app_concurrent_count.load(),\n              _options->learn_app_max_concurrent_count);\n        return;\n    }\n\n    _stub->_counter_replicas_learning_recent_round_start_count->increment();\n    _potential_secondary_states.learning_round_is_running = true;\n\n    learn_request request;\n    request.pid = get_gpid();\n    request.__set_max_gced_decree(get_max_gced_decree_for_learn());\n    request.last_committed_decree_in_app = _app->last_committed_decree();\n    request.last_committed_decree_in_prepare_list = _prepare_list->last_committed_decree();\n    request.learner = _stub->_primary_address;\n    request.signature = _potential_secondary_states.learning_version;\n    _app->prepare_get_checkpoint(request.app_specific_learn_request);\n\n    ddebug(\"%s: init_learn[%016\" PRIx64 \"]: learnee = %s, learn_duration = %\" PRIu64\n           \" ms, max_gced_decree = %\" PRId64 \", local_committed_decree = %\" PRId64 \", \"\n           \"app_committed_decree = %\" PRId64 \", app_durable_decree = %\" PRId64\n           \", current_learning_status = %s, total_copy_file_count = %\" PRIu64\n           \", total_copy_file_size = %\" PRIu64 \", total_copy_buffer_size = %\" PRIu64,\n           name(),\n           request.signature,\n           _config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           request.max_gced_decree,\n           last_committed_decree(),\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           enum_to_string(_potential_secondary_states.learning_status),\n           _potential_secondary_states.learning_copy_file_count,\n           _potential_secondary_states.learning_copy_file_size,\n           _potential_secondary_states.learning_copy_buffer_size);\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_LEARN, 0, get_gpid().thread_hash());\n    dsn::marshall(msg, request);\n    _potential_secondary_states.learning_task = rpc::call(\n        _config.primary,\n        msg,\n        &_tracker,\n        [ this, req_cap = std::move(request) ](error_code err, learn_response && resp) mutable {\n            on_learn_reply(err, std::move(req_cap), std::move(resp));\n        });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\ndecree replica::get_max_gced_decree_for_learn() const // on learner\n{\n    decree max_gced_decree_for_learn;\n\n    decree plog_max_gced_decree = max_gced_decree_no_lock();\n    decree first_learn_start = _potential_secondary_states.first_learn_start_decree;\n    if (first_learn_start == invalid_decree) {\n        // this is the first round of learn\n        max_gced_decree_for_learn = plog_max_gced_decree;\n    } else {\n        if (plog_max_gced_decree < 0) {\n            // The previously learned logs may still reside in learn_dir, and\n            // the actual plog dir is empty. In this condition the logs in learn_dir\n            // are taken as not-GCed.\n            max_gced_decree_for_learn = first_learn_start - 1;\n        } else {\n            // The actual plog dir is not empty. Use the minimum.\n            max_gced_decree_for_learn = std::min(plog_max_gced_decree, first_learn_start - 1);\n        }\n    }\n\n    return max_gced_decree_for_learn;\n}\n\n/*virtual*/ decree replica::max_gced_decree_no_lock() const\n{\n    return _private_log->max_gced_decree_no_lock(get_gpid());\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\ndecree replica::get_learn_start_decree(const learn_request &request) // on primary\n{\n    decree local_committed_decree = last_committed_decree();\n    dcheck_le_replica(request.last_committed_decree_in_app, local_committed_decree);\n\n    decree learn_start_decree_no_dup = request.last_committed_decree_in_app + 1;\n    if (!is_duplication_master()) {\n        // fast path for no duplication case: only learn those that the learner is not having.\n        return learn_start_decree_no_dup;\n    }\n\n    decree min_confirmed_decree = _duplication_mgr->min_confirmed_decree();\n\n    // Learner should include the mutations not confirmed by meta server\n    // so as to prevent data loss during duplication. For example, when\n    // the confirmed+1 decree has been missing from plog, the learner\n    // needs to learn back from it.\n    //\n    //                confirmed but missing\n    //                    |\n    // learner's plog: ======[--------------]\n    //                       |              |\n    //                      gced           committed\n    //\n    // In the above case, primary should return logs started from confirmed+1.\n\n    decree learn_start_decree_for_dup = learn_start_decree_no_dup;\n    if (min_confirmed_decree >= 0) {\n        learn_start_decree_for_dup = min_confirmed_decree + 1;\n    } else {\n        // if the confirmed_decree is unsure, copy all the logs\n        // TODO(wutao1): can we reduce the copy size?\n        decree local_gced = max_gced_decree_no_lock();\n        if (local_gced == invalid_decree) {\n            // abnormal case\n            dwarn_replica(\"no plog to be learned for duplication, continue as normal\");\n        } else {\n            learn_start_decree_for_dup = local_gced + 1;\n        }\n    }\n\n    decree learn_start_decree = learn_start_decree_no_dup;\n    if (learn_start_decree_for_dup <= request.max_gced_decree ||\n        request.max_gced_decree == invalid_decree) {\n        // `request.max_gced_decree == invalid_decree` indicates the learner has no log,\n        // see replica::get_max_gced_decree_for_learn for details.\n        if (learn_start_decree_for_dup < learn_start_decree_no_dup) {\n            learn_start_decree = learn_start_decree_for_dup;\n            ddebug_replica(\"learn_start_decree steps back to {} to ensure learner having enough \"\n                           \"logs for duplication [confirmed_decree={}, learner_gced_decree={}]\",\n                           learn_start_decree,\n                           min_confirmed_decree,\n                           request.max_gced_decree);\n        }\n    }\n    dcheck_le_replica(learn_start_decree, local_committed_decree + 1);\n    dcheck_gt_replica(learn_start_decree, 0); // learn_start_decree can never be invalid_decree\n    return learn_start_decree;\n}\n\nvoid replica::on_learn(dsn::message_ex *msg, const learn_request &request)\n{\n    _checker.only_one_thread_access();\n\n    learn_response response;\n    if (partition_status::PS_PRIMARY != status()) {\n        response.err = (partition_status::PS_INACTIVE == status() && _inactive_is_transient)\n                           ? ERR_INACTIVE_STATE\n                           : ERR_INVALID_STATE;\n        reply(msg, response);\n        return;\n    }\n\n    // but just set state to partition_status::PS_POTENTIAL_SECONDARY\n    _primary_states.get_replica_config(partition_status::PS_POTENTIAL_SECONDARY, response.config);\n\n    auto it = _primary_states.learners.find(request.learner);\n    if (it == _primary_states.learners.end()) {\n        response.config.status = partition_status::PS_INACTIVE;\n        response.err = ERR_OBJECT_NOT_FOUND;\n        reply(msg, response);\n        return;\n    }\n\n    remote_learner_state &learner_state = it->second;\n    if (learner_state.signature != request.signature) {\n        response.config.learner_signature = learner_state.signature;\n        response.err = ERR_WRONG_CHECKSUM; // means invalid signature\n        reply(msg, response);\n        return;\n    }\n\n    // prepare learn_start_decree\n    decree local_committed_decree = last_committed_decree();\n\n    // TODO: learner machine has been down for a long time, and DDD MUST happened before\n    // which leads to state lost. Now the lost state is back, what shall we do?\n    if (request.last_committed_decree_in_app > last_prepared_decree()) {\n        derror(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, learner state is newer than learnee, \"\n               \"learner_app_committed_decree = %\" PRId64 \", local_committed_decree = %\" PRId64\n               \", learn from scratch\",\n               name(),\n               request.signature,\n               request.learner.to_string(),\n               request.last_committed_decree_in_app,\n               local_committed_decree);\n\n        *(decree *)&request.last_committed_decree_in_app = 0;\n    }\n\n    // mutations are previously committed already on learner (old primary)\n    // this happens when the new primary does not commit the previously prepared mutations\n    // yet, which it should do, so let's help it now.\n    else if (request.last_committed_decree_in_app > local_committed_decree) {\n        derror(\"%s: on_learn[%016\" PRIx64\n               \"]: learner = %s, learner's last_committed_decree_in_app is newer than learnee, \"\n               \"learner_app_committed_decree = %\" PRId64 \", local_committed_decree = %\" PRId64\n               \", commit local soft\",\n               name(),\n               request.signature,\n               request.learner.to_string(),\n               request.last_committed_decree_in_app,\n               local_committed_decree);\n\n        // we shouldn't commit mutations hard coz these mutations may preparing on another learner\n        _prepare_list->commit(request.last_committed_decree_in_app, COMMIT_TO_DECREE_SOFT);\n        local_committed_decree = last_committed_decree();\n\n        if (request.last_committed_decree_in_app > local_committed_decree) {\n            derror(\"%s: on_learn[%016\" PRIx64 \"]: try to commit primary to %\" PRId64\n                   \", still less than learner(%s)'s committed decree(%\" PRId64\n                   \"), wait mutations to be commitable\",\n                   name(),\n                   request.signature,\n                   local_committed_decree,\n                   request.learner.to_string(),\n                   request.last_committed_decree_in_app);\n            response.err = ERR_INCONSISTENT_STATE;\n            reply(msg, response);\n            return;\n        }\n    }\n\n    dassert(request.last_committed_decree_in_app <= local_committed_decree,\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            request.last_committed_decree_in_app,\n            local_committed_decree);\n\n    const decree learn_start_decree = get_learn_start_decree(request);\n    response.state.__set_learn_start_decree(learn_start_decree);\n    bool delayed_replay_prepare_list = false;\n\n    ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, remote_committed_decree = %\" PRId64 \", \"\n           \"remote_app_committed_decree = %\" PRId64 \", local_committed_decree = %\" PRId64 \", \"\n           \"app_committed_decree = %\" PRId64 \", app_durable_decree = %\" PRId64 \", \"\n           \"prepare_min_decree = %\" PRId64\n           \", prepare_list_count = %d, learn_start_decree = %\" PRId64,\n           name(),\n           request.signature,\n           request.learner.to_string(),\n           request.last_committed_decree_in_prepare_list,\n           request.last_committed_decree_in_app,\n           local_committed_decree,\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           _prepare_list->min_decree(),\n           _prepare_list->count(),\n           learn_start_decree);\n\n    response.address = _stub->_primary_address;\n    response.prepare_start_decree = invalid_decree;\n    response.last_committed_decree = local_committed_decree;\n    response.err = ERR_OK;\n\n    // learn delta state or checkpoint\n    bool should_learn_cache = prepare_cached_learn_state(request,\n                                                         learn_start_decree,\n                                                         local_committed_decree,\n                                                         learner_state,\n                                                         response,\n                                                         delayed_replay_prepare_list);\n    if (!should_learn_cache) {\n        if (learn_start_decree > _app->last_durable_decree()) {\n            ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, choose to learn private logs, \"\n                   \"because learn_start_decree(%\" PRId64 \") > _app->last_durable_decree(%\" PRId64\n                   \")\",\n                   name(),\n                   request.signature,\n                   request.learner.to_string(),\n                   learn_start_decree,\n                   _app->last_durable_decree());\n            _private_log->get_learn_state(get_gpid(), learn_start_decree, response.state);\n            response.type = learn_type::LT_LOG;\n        } else if (_private_log->get_learn_state(get_gpid(), learn_start_decree, response.state)) {\n            ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, choose to learn private logs, \"\n                   \"because mutation_log::get_learn_state() returns true\",\n                   name(),\n                   request.signature,\n                   request.learner.to_string());\n            response.type = learn_type::LT_LOG;\n        } else if (learn_start_decree < request.last_committed_decree_in_app + 1) {\n            ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, choose to learn private logs, \"\n                   \"because learn_start_decree steps back for duplication\",\n                   name(),\n                   request.signature,\n                   request.learner.to_string());\n            response.type = learn_type::LT_LOG;\n        } else {\n            ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, choose to learn app, \"\n                   \"beacuse learn_start_decree(%\" PRId64 \") <= _app->last_durable_decree(%\" PRId64\n                   \"), \"\n                   \"and mutation_log::get_learn_state() returns false\",\n                   name(),\n                   request.signature,\n                   request.learner.to_string(),\n                   learn_start_decree,\n                   _app->last_durable_decree());\n            response.type = learn_type::LT_APP;\n            response.state = learn_state();\n        }\n\n        if (response.type == learn_type::LT_LOG) {\n            response.base_local_dir = _private_log->dir();\n            if (response.state.files.size() > 0) {\n                auto &last_file = response.state.files.back();\n                if (last_file == learner_state.last_learn_log_file) {\n                    ddebug(\n                        \"%s: on_learn[%016\" PRIx64\n                        \"]: learner = %s, learn the same file %s repeatedly, hint to switch file\",\n                        name(),\n                        request.signature,\n                        request.learner.to_string(),\n                        last_file.c_str());\n                    _private_log->hint_switch_file();\n                } else {\n                    learner_state.last_learn_log_file = last_file;\n                }\n            }\n            // it is safe to commit to last_committed_decree() now\n            response.state.to_decree_included = last_committed_decree();\n            ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, learn private logs succeed, \"\n                   \"learned_meta_size = %u, learned_file_count = %u, \"\n                   \"to_decree_included = %\" PRId64,\n                   name(),\n                   request.signature,\n                   request.learner.to_string(),\n                   response.state.meta.length(),\n                   static_cast<uint32_t>(response.state.files.size()),\n                   response.state.to_decree_included);\n        } else {\n            ::dsn::error_code err = _app->get_checkpoint(\n                learn_start_decree, request.app_specific_learn_request, response.state);\n\n            if (err != ERR_OK) {\n                response.err = ERR_GET_LEARN_STATE_FAILED;\n                derror(\"%s: on_learn[%016\" PRIx64\n                       \"]: learner = %s, get app checkpoint failed, error = %s\",\n                       name(),\n                       request.signature,\n                       request.learner.to_string(),\n                       err.to_string());\n            } else {\n                response.base_local_dir = _app->data_dir();\n                response.__set_replica_disk_tag(get_replica_disk_tag());\n                ddebug(\n                    \"%s: on_learn[%016\" PRIx64 \"]: learner = %s, get app learn state succeed, \"\n                    \"learned_meta_size = %u, learned_file_count = %u, learned_to_decree = %\" PRId64,\n                    name(),\n                    request.signature,\n                    request.learner.to_string(),\n                    response.state.meta.length(),\n                    static_cast<uint32_t>(response.state.files.size()),\n                    response.state.to_decree_included);\n            }\n        }\n    }\n\n    for (auto &file : response.state.files) {\n        file = file.substr(response.base_local_dir.length() + 1);\n    }\n\n    reply(msg, response);\n\n    // the replayed prepare msg needs to be AFTER the learning response msg\n    if (delayed_replay_prepare_list) {\n        replay_prepare_list();\n    }\n}\n\nvoid replica::on_learn_reply(error_code err, learn_request &&req, learn_response &&resp)\n{\n    _checker.only_one_thread_access();\n\n    dassert(partition_status::PS_POTENTIAL_SECONDARY == status(),\n            \"invalid partition status, status = %s\",\n            enum_to_string(status()));\n    dassert(req.signature == (int64_t)_potential_secondary_states.learning_version,\n            \"invalid learn signature, %\" PRId64 \" VS %\" PRId64 \"\",\n            req.signature,\n            (int64_t)_potential_secondary_states.learning_version);\n\n    if (err != ERR_OK) {\n        handle_learning_error(err, false);\n        return;\n    }\n\n    ddebug_replica(\n        \"on_learn_reply_start[{}]: learnee = {}, learn_duration ={} ms, response_err = \"\n        \"{}, remote_committed_decree = {}, prepare_start_decree = {}, learn_type = {} \"\n        \"learned_buffer_size = {}, learned_file_count = {},to_decree_included = \"\n        \"{}, learn_start_decree = {}, last_commit_decree = {}, current_learning_status = \"\n        \"{} \",\n        req.signature,\n        resp.config.primary.to_string(),\n        _potential_secondary_states.duration_ms(),\n        resp.err.to_string(),\n        resp.last_committed_decree,\n        resp.prepare_start_decree,\n        enum_to_string(resp.type),\n        resp.state.meta.length(),\n        static_cast<uint32_t>(resp.state.files.size()),\n        resp.state.to_decree_included,\n        resp.state.learn_start_decree,\n        _app->last_committed_decree(),\n        enum_to_string(_potential_secondary_states.learning_status));\n\n    _potential_secondary_states.learning_copy_buffer_size += resp.state.meta.length();\n    _stub->_counter_replicas_learning_recent_copy_buffer_size->add(resp.state.meta.length());\n\n    if (resp.err != ERR_OK) {\n        if (resp.err == ERR_INACTIVE_STATE || resp.err == ERR_INCONSISTENT_STATE) {\n            dwarn(\"%s: on_learn_reply[%016\" PRIx64\n                  \"]: learnee = %s, learnee is updating ballot(inactive state) or \"\n                  \"reconciliation(inconsistent state), delay to start another round of learning\",\n                  name(),\n                  req.signature,\n                  resp.config.primary.to_string());\n            _potential_secondary_states.learning_round_is_running = false;\n            _potential_secondary_states.delay_learning_task =\n                tasking::create_task(LPC_DELAY_LEARN,\n                                     &_tracker,\n                                     std::bind(&replica::init_learn, this, req.signature),\n                                     get_gpid().thread_hash());\n            _potential_secondary_states.delay_learning_task->enqueue(std::chrono::seconds(1));\n        } else {\n            handle_learning_error(resp.err, false);\n        }\n        return;\n    }\n\n    if (resp.config.ballot > get_ballot()) {\n        ddebug(\"%s: on_learn_reply[%016\" PRIx64\n               \"]: learnee = %s, update configuration because ballot have changed\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string());\n        bool ret = update_local_configuration(resp.config);\n        dassert(ret, \"\");\n    }\n\n    if (status() != partition_status::PS_POTENTIAL_SECONDARY) {\n        derror(\"%s: on_learn_reply[%016\" PRIx64\n               \"]: learnee = %s, current_status = %s, stop learning\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               enum_to_string(status()));\n        return;\n    }\n\n    // local state is newer than learnee\n    if (resp.last_committed_decree < _app->last_committed_decree()) {\n        dwarn(\"%s: on_learn_reply[%016\" PRIx64\n              \"]: learnee = %s, learner state is newer than learnee (primary): %\" PRId64\n              \" vs %\" PRId64 \", create new app\",\n              name(),\n              req.signature,\n              resp.config.primary.to_string(),\n              _app->last_committed_decree(),\n              resp.last_committed_decree);\n\n        _stub->_counter_replicas_learning_recent_learn_reset_count->increment();\n\n        // close app\n        auto err = _app->close(true);\n        if (err != ERR_OK) {\n            derror(\"%s: on_learn_reply[%016\" PRIx64\n                   \"]: learnee = %s, close app (with clear_state=true) failed, err = %s\",\n                   name(),\n                   req.signature,\n                   resp.config.primary.to_string(),\n                   err.to_string());\n        }\n\n        // backup old data dir\n        if (err == ERR_OK) {\n            std::string old_dir = _app->data_dir();\n            if (dsn::utils::filesystem::directory_exists(old_dir)) {\n                char rename_dir[1024];\n                sprintf(rename_dir, \"%s.%\" PRIu64 \".discarded\", old_dir.c_str(), dsn_now_us());\n                if (dsn::utils::filesystem::rename_path(old_dir, rename_dir)) {\n                    dwarn(\"%s: {replica_dir_op} succeed to move directory from '%s' to '%s'\",\n                          name(),\n                          old_dir.c_str(),\n                          rename_dir);\n                } else {\n                    dassert(false,\n                            \"%s: failed to move directory from '%s' to '%s'\",\n                            name(),\n                            old_dir.c_str(),\n                            rename_dir);\n                }\n            }\n        }\n\n        if (err == ERR_OK) {\n            err = _app->open_new_internal(this,\n                                          _stub->_log->on_partition_reset(get_gpid(), 0),\n                                          _private_log->on_partition_reset(get_gpid(), 0));\n\n            if (err != ERR_OK) {\n                derror(\"%s: on_learn_reply[%016\" PRIx64\n                       \"]: learnee = %s, open app (with create_new=true) failed, err = %s\",\n                       name(),\n                       req.signature,\n                       resp.config.primary.to_string(),\n                       err.to_string());\n            }\n        }\n\n        if (err == ERR_OK) {\n            dassert(_app->last_committed_decree() == 0, \"must be zero after app::open(true)\");\n            dassert(_app->last_durable_decree() == 0, \"must be zero after app::open(true)\");\n\n            // reset prepare list\n            _prepare_list->reset(0);\n        }\n\n        if (err != ERR_OK) {\n            _potential_secondary_states.learn_remote_files_task =\n                tasking::create_task(LPC_LEARN_REMOTE_DELTA_FILES, &_tracker, [\n                    this,\n                    err,\n                    copy_start = _potential_secondary_states.duration_ms(),\n                    req_cap = std::move(req),\n                    resp_cap = std::move(resp)\n                ]() mutable {\n                    on_copy_remote_state_completed(\n                        err, 0, copy_start, std::move(req_cap), std::move(resp_cap));\n                });\n            _potential_secondary_states.learn_remote_files_task->enqueue();\n            return;\n        }\n    }\n\n    if (resp.type == learn_type::LT_APP) {\n        if (++_stub->_learn_app_concurrent_count > _options->learn_app_max_concurrent_count) {\n            --_stub->_learn_app_concurrent_count;\n            dwarn(\"%s: on_learn_reply[%016\" PRIx64\n                  \"]: learnee = %s, learn_app_concurrent_count(%d) >= \"\n                  \"learn_app_max_concurrent_count(%d), skip this round\",\n                  name(),\n                  _potential_secondary_states.learning_version,\n                  _config.primary.to_string(),\n                  _stub->_learn_app_concurrent_count.load(),\n                  _options->learn_app_max_concurrent_count);\n            _potential_secondary_states.learning_round_is_running = false;\n            return;\n        } else {\n            _potential_secondary_states.learn_app_concurrent_count_increased = true;\n            ddebug(\"%s: on_learn_reply[%016\" PRIx64\n                   \"]: learnee = %s, ++learn_app_concurrent_count = %d\",\n                   name(),\n                   _potential_secondary_states.learning_version,\n                   _config.primary.to_string(),\n                   _stub->_learn_app_concurrent_count.load());\n        }\n    }\n\n    switch (resp.type) {\n    case learn_type::LT_CACHE:\n        _stub->_counter_replicas_learning_recent_learn_cache_count->increment();\n        break;\n    case learn_type::LT_APP:\n        _stub->_counter_replicas_learning_recent_learn_app_count->increment();\n        break;\n    case learn_type::LT_LOG:\n        _stub->_counter_replicas_learning_recent_learn_log_count->increment();\n        break;\n    default:\n        // do nothing\n        break;\n    }\n\n    if (resp.prepare_start_decree != invalid_decree) {\n        dassert(resp.type == learn_type::LT_CACHE,\n                \"invalid learn_type, type = %s\",\n                enum_to_string(resp.type));\n        dassert(resp.state.files.size() == 0, \"\");\n        dassert(_potential_secondary_states.learning_status ==\n                    learner_status::LearningWithoutPrepare,\n                \"invalid learning_status, status = %s\",\n                enum_to_string(_potential_secondary_states.learning_status));\n        _potential_secondary_states.learning_status = learner_status::LearningWithPrepareTransient;\n\n        // reset log positions for later mutations\n        // WARNING: it still requires checkpoint operation in later\n        // on_copy_remote_state_completed to ensure the state is completed\n        // if there is a failure in between, our checking\n        // during app::open_internal will invalidate the logs\n        // appended by the mutations AFTER current position\n        err = _app->update_init_info(\n            this,\n            _stub->_log->on_partition_reset(get_gpid(), _app->last_committed_decree()),\n            _private_log->on_partition_reset(get_gpid(), _app->last_committed_decree()),\n            _app->last_committed_decree());\n\n        // switch private log to make learning easier\n        _private_log->demand_switch_file();\n\n        // reset preparelist\n        _potential_secondary_states.learning_start_prepare_decree = resp.prepare_start_decree;\n        _prepare_list->truncate(_app->last_committed_decree());\n        ddebug(\"%s: on_learn_reply[%016\" PRIx64\n               \"]: learnee = %s, truncate prepare list, local_committed_decree = %\" PRId64\n               \", current_learning_status = %s\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               _app->last_committed_decree(),\n               enum_to_string(_potential_secondary_states.learning_status));\n\n        // persist incoming mutations into private log and apply them to prepare-list\n        std::pair<decree, decree> cache_range;\n        binary_reader reader(resp.state.meta);\n        while (!reader.is_eof()) {\n            auto mu = mutation::read_from(reader, nullptr);\n            if (mu->data.header.decree > last_committed_decree()) {\n                dinfo(\"%s: on_learn_reply[%016\" PRIx64 \"]: apply learned mutation %s\",\n                      name(),\n                      req.signature,\n                      mu->name());\n\n                // write to private log with no callback, the later 2pc ensures that logs\n                // are written to the disk\n                _private_log->append(mu, LPC_WRITE_REPLICATION_LOG_COMMON, &_tracker, nullptr);\n\n                // because private log are written without callback, need to manully set flag\n                mu->set_logged();\n\n                // then we prepare, it is possible that a committed mutation exists in learner's\n                // prepare log,\n                // but with DIFFERENT ballot. Reference https://github.com/imzhenyu/rDSN/issues/496\n                mutation_ptr existing_mutation =\n                    _prepare_list->get_mutation_by_decree(mu->data.header.decree);\n                if (existing_mutation != nullptr &&\n                    existing_mutation->data.header.ballot > mu->data.header.ballot) {\n                    ddebug(\"%s: on_learn_reply[%016\" PRIx64 \"]: learnee = %s, \"\n                           \"mutation(%s) exist on the learner with larger ballot %\" PRId64 \"\",\n                           name(),\n                           req.signature,\n                           resp.config.primary.to_string(),\n                           mu->name(),\n                           existing_mutation->data.header.ballot);\n                } else {\n                    _prepare_list->prepare(mu, partition_status::PS_POTENTIAL_SECONDARY);\n                }\n\n                if (cache_range.first == 0 || mu->data.header.decree < cache_range.first)\n                    cache_range.first = mu->data.header.decree;\n                if (cache_range.second == 0 || mu->data.header.decree > cache_range.second)\n                    cache_range.second = mu->data.header.decree;\n            }\n        }\n\n        ddebug(\"%s: on_learn_reply[%016\" PRIx64 \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n               \"apply cache done, prepare_cache_range = <%\" PRId64 \", %\" PRId64 \">, \"\n               \"local_committed_decree = %\" PRId64 \", app_committed_decree = %\" PRId64\n               \", current_learning_status = %s\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               _potential_secondary_states.duration_ms(),\n               cache_range.first,\n               cache_range.second,\n               last_committed_decree(),\n               _app->last_committed_decree(),\n               enum_to_string(_potential_secondary_states.learning_status));\n\n        // further states are synced using 2pc, and we must commit now as those later 2pc messages\n        // thinks they should\n        _prepare_list->commit(resp.prepare_start_decree - 1, COMMIT_TO_DECREE_HARD);\n        dassert(_prepare_list->last_committed_decree() == _app->last_committed_decree(),\n                \"last_committed_decree of prepare_list and app isn't equal, %\" PRId64 \" VS %\" PRId64\n                \"\",\n                _prepare_list->last_committed_decree(),\n                _app->last_committed_decree());\n        dassert(resp.state.files.size() == 0, \"\");\n\n        // all state is complete\n        dassert(_app->last_committed_decree() + 1 >=\n                    _potential_secondary_states.learning_start_prepare_decree,\n                \"state is incomplete\");\n\n        // go to next stage\n        _potential_secondary_states.learning_status = learner_status::LearningWithPrepare;\n        _potential_secondary_states.learn_remote_files_task =\n            tasking::create_task(LPC_LEARN_REMOTE_DELTA_FILES, &_tracker, [\n                this,\n                err,\n                copy_start = _potential_secondary_states.duration_ms(),\n                req_cap = std::move(req),\n                resp_cap = std::move(resp)\n            ]() mutable {\n                on_copy_remote_state_completed(\n                    err, 0, copy_start, std::move(req_cap), std::move(resp_cap));\n            });\n        _potential_secondary_states.learn_remote_files_task->enqueue();\n    }\n\n    else if (resp.state.files.size() > 0) {\n        auto learn_dir = _app->learn_dir();\n        utils::filesystem::remove_path(learn_dir);\n        utils::filesystem::create_directory(learn_dir);\n\n        if (!dsn::utils::filesystem::directory_exists(learn_dir)) {\n            derror(\"%s: on_learn_reply[%016\" PRIx64\n                   \"]: learnee = %s, create replica learn dir %s failed\",\n                   name(),\n                   req.signature,\n                   resp.config.primary.to_string(),\n                   learn_dir.c_str());\n\n            _potential_secondary_states.learn_remote_files_task =\n                tasking::create_task(LPC_LEARN_REMOTE_DELTA_FILES, &_tracker, [\n                    this,\n                    copy_start = _potential_secondary_states.duration_ms(),\n                    req_cap = std::move(req),\n                    resp_cap = std::move(resp)\n                ]() mutable {\n                    on_copy_remote_state_completed(ERR_FILE_OPERATION_FAILED,\n                                                   0,\n                                                   copy_start,\n                                                   std::move(req_cap),\n                                                   std::move(resp_cap));\n                });\n            _potential_secondary_states.learn_remote_files_task->enqueue();\n            return;\n        }\n\n        bool high_priority = (resp.type == learn_type::LT_APP ? false : true);\n        ddebug(\"%s: on_learn_reply[%016\" PRIx64 \"]: learnee = %s, learn_duration = %\" PRIu64\n               \" ms, start to copy remote files, copy_file_count = %d, priority = %s\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               _potential_secondary_states.duration_ms(),\n               static_cast<int>(resp.state.files.size()),\n               high_priority ? \"high\" : \"low\");\n\n        _potential_secondary_states.learn_remote_files_task = _stub->_nfs->copy_remote_files(\n            resp.config.primary,\n            resp.replica_disk_tag,\n            resp.base_local_dir,\n            resp.state.files,\n            get_replica_disk_tag(),\n            learn_dir,\n            true, // overwrite\n            high_priority,\n            LPC_REPLICATION_COPY_REMOTE_FILES,\n            &_tracker,\n            [\n              this,\n              copy_start = _potential_secondary_states.duration_ms(),\n              req_cap = std::move(req),\n              resp_copy = resp\n            ](error_code err, size_t sz) mutable {\n                on_copy_remote_state_completed(\n                    err, sz, copy_start, std::move(req_cap), std::move(resp_copy));\n            });\n    } else {\n        _potential_secondary_states.learn_remote_files_task =\n            tasking::create_task(LPC_LEARN_REMOTE_DELTA_FILES, &_tracker, [\n                this,\n                copy_start = _potential_secondary_states.duration_ms(),\n                req_cap = std::move(req),\n                resp_cap = std::move(resp)\n            ]() mutable {\n                on_copy_remote_state_completed(\n                    ERR_OK, 0, copy_start, std::move(req_cap), std::move(resp_cap));\n            });\n        _potential_secondary_states.learn_remote_files_task->enqueue();\n    }\n}\n\nbool replica::prepare_cached_learn_state(const learn_request &request,\n                                         decree learn_start_decree,\n                                         decree local_committed_decree,\n                                         /*out*/ remote_learner_state &learner_state,\n                                         /*out*/ learn_response &response,\n                                         /*out*/ bool &delayed_replay_prepare_list)\n{\n    // set prepare_start_decree when to-be-learn state is covered by prepare list,\n    // note min_decree can be NOT present in prepare list when list.count == 0\n    if (learn_start_decree > _prepare_list->min_decree() ||\n        (learn_start_decree == _prepare_list->min_decree() && _prepare_list->count() > 0)) {\n        if (learner_state.prepare_start_decree == invalid_decree) {\n            // start from (last_committed_decree + 1)\n            learner_state.prepare_start_decree = local_committed_decree + 1;\n\n            cleanup_preparing_mutations(false);\n\n            // the replayed prepare msg needs to be AFTER the learning response msg\n            // to reduce probability that preparing messages arrive remote early than\n            // learning response msg.\n            delayed_replay_prepare_list = true;\n\n            ddebug(\"%s: on_learn[%016\" PRIx64\n                   \"]: learner = %s, set prepare_start_decree = %\" PRId64,\n                   name(),\n                   request.signature,\n                   request.learner.to_string(),\n                   local_committed_decree + 1);\n        }\n\n        response.prepare_start_decree = learner_state.prepare_start_decree;\n    } else {\n        learner_state.prepare_start_decree = invalid_decree;\n    }\n\n    // only learn mutation cache in range of [learn_start_decree, prepare_start_decree),\n    // in this case, the state on the PS should be contiguous (+ to-be-sent prepare list)\n    if (response.prepare_start_decree != invalid_decree) {\n        binary_writer writer;\n        int count = 0;\n        for (decree d = learn_start_decree; d < response.prepare_start_decree; d++) {\n            auto mu = _prepare_list->get_mutation_by_decree(d);\n            dassert(mu != nullptr, \"mutation must not be nullptr, decree = %\" PRId64 \"\", d);\n            mu->write_to(writer, nullptr);\n            count++;\n        }\n        response.type = learn_type::LT_CACHE;\n        response.state.meta = writer.get_buffer();\n        ddebug(\"%s: on_learn[%016\" PRIx64 \"]: learner = %s, learn mutation cache succeed, \"\n               \"learn_start_decree = %\" PRId64 \", prepare_start_decree = %\" PRId64 \", \"\n               \"learn_mutation_count = %d, learn_data_size = %d\",\n               name(),\n               request.signature,\n               request.learner.to_string(),\n               learn_start_decree,\n               response.prepare_start_decree,\n               count,\n               response.state.meta.length());\n        return true;\n    }\n    return false;\n}\n\nvoid replica::on_copy_remote_state_completed(error_code err,\n                                             size_t size,\n                                             uint64_t copy_start_time,\n                                             learn_request &&req,\n                                             learn_response &&resp)\n{\n    decree old_prepared = last_prepared_decree();\n    decree old_committed = last_committed_decree();\n    decree old_app_committed = _app->last_committed_decree();\n    decree old_app_durable = _app->last_durable_decree();\n\n    ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n           \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n           \"copy remote state done, err = %s, copy_file_count = %d, \"\n           \"copy_file_size = %\" PRIu64 \", copy_time_used = %\" PRIu64 \" ms, \"\n           \"local_committed_decree = %\" PRId64 \", app_committed_decree = %\" PRId64\n           \", app_durable_decree = %\" PRId64 \", \"\n           \"prepare_start_decree = %\" PRId64 \", current_learning_status = %s\",\n           name(),\n           req.signature,\n           resp.config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           err.to_string(),\n           static_cast<int>(resp.state.files.size()),\n           static_cast<uint64_t>(size),\n           _potential_secondary_states.duration_ms() - copy_start_time,\n           last_committed_decree(),\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           resp.prepare_start_decree,\n           enum_to_string(_potential_secondary_states.learning_status));\n\n    if (resp.type == learn_type::LT_APP) {\n        --_stub->_learn_app_concurrent_count;\n        _potential_secondary_states.learn_app_concurrent_count_increased = false;\n        ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n               \"]: learnee = %s, --learn_app_concurrent_count = %d\",\n               name(),\n               _potential_secondary_states.learning_version,\n               _config.primary.to_string(),\n               _stub->_learn_app_concurrent_count.load());\n    }\n\n    if (err == ERR_OK) {\n        _potential_secondary_states.learning_copy_file_count += resp.state.files.size();\n        _potential_secondary_states.learning_copy_file_size += size;\n        _stub->_counter_replicas_learning_recent_copy_file_count->add(resp.state.files.size());\n        _stub->_counter_replicas_learning_recent_copy_file_size->add(size);\n    }\n\n    if (err != ERR_OK) {\n        // do nothing\n    } else if (_potential_secondary_states.learning_status == learner_status::LearningWithPrepare) {\n        dassert(resp.type == learn_type::LT_CACHE,\n                \"invalid learn_type, type = %s\",\n                enum_to_string(resp.type));\n    } else {\n        dassert(resp.type == learn_type::LT_APP || resp.type == learn_type::LT_LOG,\n                \"invalid learn_type, type = %s\",\n                enum_to_string(resp.type));\n\n        learn_state lstate;\n        lstate.from_decree_excluded = resp.state.from_decree_excluded;\n        lstate.to_decree_included = resp.state.to_decree_included;\n        lstate.meta = resp.state.meta;\n        if (resp.state.__isset.learn_start_decree) {\n            lstate.__set_learn_start_decree(resp.state.learn_start_decree);\n        }\n\n        for (auto &f : resp.state.files) {\n            std::string file = utils::filesystem::path_combine(_app->learn_dir(), f);\n            lstate.files.push_back(file);\n        }\n\n        // apply app learning\n        if (resp.type == learn_type::LT_APP) {\n            auto start_ts = dsn_now_ns();\n            err = _app->apply_checkpoint(replication_app_base::chkpt_apply_mode::learn, lstate);\n            if (err == ERR_OK) {\n\n                dassert(_app->last_committed_decree() >= _app->last_durable_decree(),\n                        \"invalid app state, %\" PRId64 \" VS %\" PRId64 \"\",\n                        _app->last_committed_decree(),\n                        _app->last_durable_decree());\n                // because if the original _app->last_committed_decree > resp.last_committed_decree,\n                // the learn_start_decree will be set to 0, which makes learner to learn from\n                // scratch\n                dassert(_app->last_committed_decree() <= resp.last_committed_decree,\n                        \"invalid app state, %\" PRId64 \" VS %\" PRId64 \"\",\n                        _app->last_committed_decree(),\n                        resp.last_committed_decree);\n                ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n                       \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n                       \"checkpoint duration = %\" PRIu64\n                       \" ns, apply checkpoint succeed, app_committed_decree = %\" PRId64,\n                       name(),\n                       req.signature,\n                       resp.config.primary.to_string(),\n                       _potential_secondary_states.duration_ms(),\n                       dsn_now_ns() - start_ts,\n                       _app->last_committed_decree());\n            } else {\n                derror(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n                       \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n                       \"checkpoint duration = %\" PRIu64 \" ns, apply checkpoint failed, err = %s\",\n                       name(),\n                       req.signature,\n                       resp.config.primary.to_string(),\n                       _potential_secondary_states.duration_ms(),\n                       dsn_now_ns() - start_ts,\n                       err.to_string());\n            }\n        }\n\n        // apply log learning\n        else {\n            auto start_ts = dsn_now_ns();\n            err = apply_learned_state_from_private_log(lstate);\n            if (err == ERR_OK) {\n                ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n                       \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n                       \"apply_log_duration = %\" PRIu64 \" ns, apply learned state from private log \"\n                       \"succeed, app_committed_decree = %\" PRId64,\n                       name(),\n                       req.signature,\n                       resp.config.primary.to_string(),\n                       _potential_secondary_states.duration_ms(),\n                       dsn_now_ns() - start_ts,\n                       _app->last_committed_decree());\n            } else {\n                derror(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n                       \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n                       \"apply_log_duration = %\" PRIu64\n                       \" ns, apply learned state from private log failed, err = %s\",\n                       name(),\n                       req.signature,\n                       resp.config.primary.to_string(),\n                       _potential_secondary_states.duration_ms(),\n                       dsn_now_ns() - start_ts,\n                       err.to_string());\n            }\n        }\n\n        // reset prepare list to make it catch with app\n        _prepare_list->reset(_app->last_committed_decree());\n\n        ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n               \"]: learnee = %s, learn_duration = %\" PRIu64\n               \" ms, apply checkpoint/log done, err = %s, \"\n               \"last_prepared_decree = (%\" PRId64 \" => %\" PRId64 \"), \"\n               \"last_committed_decree = (%\" PRId64 \" => %\" PRId64 \"), \"\n               \"app_committed_decree = (%\" PRId64 \" => %\" PRId64 \"), \"\n               \"app_durable_decree = (%\" PRId64 \" => %\" PRId64 \"), \"\n               \"remote_committed_decree = %\" PRId64 \", \"\n               \"prepare_start_decree = %\" PRId64 \", \"\n               \"current_learning_status = %s\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               _potential_secondary_states.duration_ms(),\n               err.to_string(),\n               old_prepared,\n               last_prepared_decree(),\n               old_committed,\n               last_committed_decree(),\n               old_app_committed,\n               _app->last_committed_decree(),\n               old_app_durable,\n               _app->last_durable_decree(),\n               resp.last_committed_decree,\n               resp.prepare_start_decree,\n               enum_to_string(_potential_secondary_states.learning_status));\n    }\n\n    // if catch-up done, do flush to enable all learned state is durable\n    if (err == ERR_OK && resp.prepare_start_decree != invalid_decree &&\n        _app->last_committed_decree() + 1 >=\n            _potential_secondary_states.learning_start_prepare_decree &&\n        _app->last_committed_decree() > _app->last_durable_decree()) {\n        err = background_sync_checkpoint();\n\n        ddebug(\"%s: on_copy_remote_state_completed[%016\" PRIx64\n               \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, flush done, err = %s, \"\n               \"app_committed_decree = %\" PRId64 \", app_durable_decree = %\" PRId64 \"\",\n               name(),\n               req.signature,\n               resp.config.primary.to_string(),\n               _potential_secondary_states.duration_ms(),\n               err.to_string(),\n               _app->last_committed_decree(),\n               _app->last_durable_decree());\n\n        if (err == ERR_OK) {\n            dassert(_app->last_committed_decree() == _app->last_durable_decree(),\n                    \"%\" PRId64 \" VS %\" PRId64 \"\",\n                    _app->last_committed_decree(),\n                    _app->last_durable_decree());\n        }\n    }\n\n    // it is possible that the _potential_secondary_states.learn_remote_files_task is still running\n    // while its body is definitely done already as being here, so we manually set its value to\n    // nullptr\n    // so that we don't have unnecessary failed reconfiguration later due to this non-nullptr in\n    // cleanup\n    _potential_secondary_states.learn_remote_files_task = nullptr;\n\n    _potential_secondary_states.learn_remote_files_completed_task =\n        tasking::create_task(LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED,\n                             &_tracker,\n                             [this, err]() { on_learn_remote_state_completed(err); },\n                             get_gpid().thread_hash());\n    _potential_secondary_states.learn_remote_files_completed_task->enqueue();\n}\n\nvoid replica::on_learn_remote_state_completed(error_code err)\n{\n    _checker.only_one_thread_access();\n\n    if (partition_status::PS_POTENTIAL_SECONDARY != status()) {\n        dwarn(\"%s: on_learn_remote_state_completed[%016\" PRIx64\n              \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, err = %s, \"\n              \"the learner status is not PS_POTENTIAL_SECONDARY, but %s, ignore\",\n              name(),\n              _potential_secondary_states.learning_version,\n              _config.primary.to_string(),\n              _potential_secondary_states.duration_ms(),\n              err.to_string(),\n              enum_to_string(status()));\n        return;\n    }\n\n    ddebug(\"%s: on_learn_remote_state_completed[%016\" PRIx64\n           \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, err = %s, \"\n           \"local_committed_decree = %\" PRId64 \", app_committed_decree = %\" PRId64\n           \", app_durable_decree = %\" PRId64 \", current_learning_status = %s\",\n           name(),\n           _potential_secondary_states.learning_version,\n           _config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           err.to_string(),\n           last_committed_decree(),\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           enum_to_string(_potential_secondary_states.learning_status));\n\n    _potential_secondary_states.learning_round_is_running = false;\n\n    if (err != ERR_OK) {\n        handle_learning_error(err, true);\n    } else {\n        // continue\n        init_learn(_potential_secondary_states.learning_version);\n    }\n}\n\nvoid replica::handle_learning_error(error_code err, bool is_local_error)\n{\n    _checker.only_one_thread_access();\n\n    derror(\"%s: handle_learning_error[%016\" PRIx64 \"]: learnee = %s, learn_duration = %\" PRIu64\n           \" ms, err = %s, %s\",\n           name(),\n           _potential_secondary_states.learning_version,\n           _config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           err.to_string(),\n           is_local_error ? \"local_error\" : \"remote error\");\n\n    _stub->_counter_replicas_learning_recent_learn_fail_count->increment();\n\n    update_local_configuration_with_no_ballot_change(\n        is_local_error ? partition_status::PS_ERROR : partition_status::PS_INACTIVE);\n}\n\nerror_code replica::handle_learning_succeeded_on_primary(::dsn::rpc_address node,\n                                                         uint64_t learn_signature)\n{\n    auto it = _primary_states.learners.find(node);\n    if (it == _primary_states.learners.end()) {\n        derror(\"%s: handle_learning_succeeded_on_primary[%016\" PRIx64 \"]: learner = %s, \"\n               \"learner not found on primary, return ERR_LEARNER_NOT_FOUND\",\n               name(),\n               learn_signature,\n               node.to_string());\n        return ERR_LEARNER_NOT_FOUND;\n    }\n\n    if (it->second.signature != (int64_t)learn_signature) {\n        derror(\"%s: handle_learning_succeeded_on_primary[%016\" PRIx64 \"]: learner = %s, \"\n               \"signature not matched, current signature on primary is [%016\" PRIx64\n               \"], return ERR_INVALID_STATE\",\n               name(),\n               learn_signature,\n               node.to_string(),\n               it->second.signature);\n        return ERR_INVALID_STATE;\n    }\n\n    upgrade_to_secondary_on_primary(node);\n    return ERR_OK;\n}\n\nvoid replica::notify_learn_completion()\n{\n    group_check_response report;\n    report.pid = get_gpid();\n    report.err = ERR_OK;\n    report.last_committed_decree_in_app = _app->last_committed_decree();\n    report.last_committed_decree_in_prepare_list = last_committed_decree();\n    report.learner_signature = _potential_secondary_states.learning_version;\n    report.learner_status_ = _potential_secondary_states.learning_status;\n    report.node = _stub->_primary_address;\n\n    ddebug(\"%s: notify_learn_completion[%016\" PRIx64 \"]: learnee = %s, \"\n           \"learn_duration = %\" PRIu64 \" ms, local_committed_decree = %\" PRId64 \", \"\n           \"app_committed_decree = %\" PRId64 \", app_durable_decree = %\" PRId64\n           \", current_learning_status = %s\",\n           name(),\n           _potential_secondary_states.learning_version,\n           _config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           last_committed_decree(),\n           _app->last_committed_decree(),\n           _app->last_durable_decree(),\n           enum_to_string(_potential_secondary_states.learning_status));\n\n    if (_potential_secondary_states.completion_notify_task != nullptr) {\n        _potential_secondary_states.completion_notify_task->cancel(false);\n    }\n\n    dsn::message_ex *msg =\n        dsn::message_ex::create_request(RPC_LEARN_COMPLETION_NOTIFY, 0, get_gpid().thread_hash());\n    dsn::marshall(msg, report);\n\n    _potential_secondary_states.completion_notify_task =\n        rpc::call(_config.primary, msg, &_tracker, [\n            this,\n            report = std::move(report)\n        ](error_code err, learn_notify_response && resp) mutable {\n            on_learn_completion_notification_reply(err, std::move(report), std::move(resp));\n        });\n}\n\nvoid replica::on_learn_completion_notification(const group_check_response &report,\n                                               /*out*/ learn_notify_response &response)\n{\n    _checker.only_one_thread_access();\n\n    ddebug(\"%s: on_learn_completion_notification[%016\" PRIx64\n           \"]: learner = %s, learning_status = %s\",\n           name(),\n           report.learner_signature,\n           report.node.to_string(),\n           enum_to_string(report.learner_status_));\n\n    if (status() != partition_status::PS_PRIMARY) {\n        response.err = (partition_status::PS_INACTIVE == status() && _inactive_is_transient)\n                           ? ERR_INACTIVE_STATE\n                           : ERR_INVALID_STATE;\n        derror(\"%s: on_learn_completion_notification[%016\" PRIx64\n               \"]: learner = %s, this replica is not primary, but %s, reply %s\",\n               name(),\n               report.learner_signature,\n               report.node.to_string(),\n               enum_to_string(status()),\n               response.err.to_string());\n    } else if (report.learner_status_ != learner_status::LearningSucceeded) {\n        response.err = ERR_INVALID_STATE;\n        derror(\"%s: on_learn_completion_notification[%016\" PRIx64 \"]: learner = %s, \"\n               \"learner_status is not LearningSucceeded, but %s, reply ERR_INVALID_STATE\",\n               name(),\n               report.learner_signature,\n               report.node.to_string(),\n               enum_to_string(report.learner_status_));\n    } else {\n        response.err = handle_learning_succeeded_on_primary(report.node, report.learner_signature);\n        if (response.err != ERR_OK) {\n            derror(\"%s: on_learn_completion_notification[%016\" PRIx64 \"]: learner = %s, \"\n                   \"handle learning succeeded on primary failed, reply %s\",\n                   name(),\n                   report.learner_signature,\n                   report.node.to_string(),\n                   response.err.to_string());\n        }\n    }\n}\n\nvoid replica::on_learn_completion_notification_reply(error_code err,\n                                                     group_check_response &&report,\n                                                     learn_notify_response &&resp)\n{\n    _checker.only_one_thread_access();\n\n    dassert(partition_status::PS_POTENTIAL_SECONDARY == status(),\n            \"invalid partition_status, status = %s\",\n            enum_to_string(status()));\n    dassert(_potential_secondary_states.learning_status == learner_status::LearningSucceeded,\n            \"invalid learner_status, status = %s\",\n            enum_to_string(_potential_secondary_states.learning_status));\n    dassert(report.learner_signature == (int64_t)_potential_secondary_states.learning_version,\n            \"%\" PRId64 \" VS %\" PRId64 \"\",\n            report.learner_signature,\n            (int64_t)_potential_secondary_states.learning_version);\n\n    if (err != ERR_OK) {\n        handle_learning_error(err, false);\n        return;\n    }\n\n    if (resp.signature != (int64_t)_potential_secondary_states.learning_version) {\n        derror(\"%s: on_learn_completion_notification_reply[%016\" PRIx64\n               \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n               \"signature not matched, current signature on primary is [%016\" PRIx64 \"]\",\n               name(),\n               report.learner_signature,\n               _config.primary.to_string(),\n               _potential_secondary_states.duration_ms(),\n               resp.signature);\n        handle_learning_error(ERR_INVALID_STATE, false);\n        return;\n    }\n\n    ddebug(\"%s: on_learn_completion_notification_reply[%016\" PRIx64\n           \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, response_err = %s\",\n           name(),\n           report.learner_signature,\n           _config.primary.to_string(),\n           _potential_secondary_states.duration_ms(),\n           resp.err.to_string());\n\n    if (resp.err != ERR_OK) {\n        if (resp.err == ERR_INACTIVE_STATE) {\n            dwarn(\"%s: on_learn_completion_notification_reply[%016\" PRIx64\n                  \"]: learnee = %s, learn_duration = %\" PRIu64 \" ms, \"\n                  \"learnee is updating ballot, delay to start another round of learning\",\n                  name(),\n                  report.learner_signature,\n                  _config.primary.to_string(),\n                  _potential_secondary_states.duration_ms());\n            _potential_secondary_states.learning_round_is_running = false;\n            _potential_secondary_states.delay_learning_task = tasking::create_task(\n                LPC_DELAY_LEARN,\n                &_tracker,\n                std::bind(&replica::init_learn, this, report.learner_signature),\n                get_gpid().thread_hash());\n            _potential_secondary_states.delay_learning_task->enqueue(std::chrono::seconds(1));\n        } else {\n            handle_learning_error(resp.err, false);\n        }\n    } else {\n        _stub->_counter_replicas_learning_recent_learn_succ_count->increment();\n    }\n}\n\nvoid replica::on_add_learner(const group_check_request &request)\n{\n    ddebug_replica(\"process add learner, primary = {}, ballot ={}, status ={}, \"\n                   \"last_committed_decree = {}, duplicating = {}\",\n                   request.config.primary.to_string(),\n                   request.config.ballot,\n                   enum_to_string(request.config.status),\n                   request.last_committed_decree,\n                   request.app.duplicating);\n\n    if (request.config.ballot < get_ballot()) {\n        dwarn_replica(\"on_add_learner ballot is old, skipped\");\n        return;\n    }\n\n    if (request.config.ballot > get_ballot() ||\n        is_same_ballot_status_change_allowed(status(), request.config.status)) {\n        if (!update_local_configuration(request.config, true))\n            return;\n\n        dassert_replica(partition_status::PS_POTENTIAL_SECONDARY == status(),\n                        \"invalid partition_status, status = {}\",\n                        enum_to_string(status()));\n\n        _is_duplication_master = request.app.duplicating;\n        init_learn(request.config.learner_signature);\n    }\n}\n\n// in non-replication thread\nerror_code replica::apply_learned_state_from_private_log(learn_state &state)\n{\n    bool duplicating = is_duplication_master();\n    // if no dunplicate, learn_start_decree=last_commit decree, step_back means whether\n    // `learn_start_decree`should be stepped back to include all the\n    // unconfirmed when duplicating in this round of learn. default is false\n    bool step_back = false;\n\n    // in this case, this means `learn_start_decree` must have been stepped back to include all the\n    // unconfirmed(learn_start_decree=last_confirmed_decree) when duplicating in this round of\n    // learn.\n    //              confirmed    gced          committed\n    //                  |          |              |\n    // learner's plog: ============[-----log------]\n    //                   |\n    //                   |                            <cache>\n    // learn_state:      [----------log-files--------]------]\n    //                   |                                  |\n    // ==>       learn_start_decree                         |\n    // learner's plog    |                              committed\n    // after applied:    [---------------log----------------]\n    if (duplicating && state.__isset.learn_start_decree &&\n        state.learn_start_decree < _app->last_committed_decree() + 1) {\n        ddebug_replica(\"learn_start_decree({}) < _app->last_committed_decree() + 1({}),   learn \"\n                       \"must stepped back to include all the unconfirmed \",\n                       state.learn_start_decree,\n                       _app->last_committed_decree() + 1);\n\n        // move the `learn/` dir to working dir (`plog/`) to replace current log files to replay\n        error_code err = _private_log->reset_from(\n            _app->learn_dir(),\n            [](int log_length, mutation_ptr &mu) { return true; },\n            [this](error_code err) {\n                tasking::enqueue(LPC_REPLICATION_ERROR,\n                                 &_tracker,\n                                 [this, err]() { handle_local_failure(err); },\n                                 get_gpid().thread_hash());\n            });\n        if (err != ERR_OK) {\n            derror_replica(\"failed to reset this private log with logs in learn/ dir: {}\", err);\n            return err;\n        }\n\n        // only select uncommitted logs to be replayed and applied into storage.\n        learn_state tmp_state;\n        _private_log->get_learn_state(get_gpid(), _app->last_committed_decree() + 1, tmp_state);\n        state.files = tmp_state.files;\n        step_back = true;\n    }\n\n    int64_t offset;\n    error_code err;\n\n    // temp prepare list for learning purpose\n    prepare_list plist(this,\n                       _app->last_committed_decree(),\n                       _options->max_mutation_count_in_prepare_list,\n                       [this, duplicating, step_back](mutation_ptr &mu) {\n                           if (mu->data.header.decree == _app->last_committed_decree() + 1) {\n                               // TODO: assign the returned error_code to err and check it\n                               _app->apply_mutation(mu);\n\n                               // appends logs-in-cache into plog to ensure them can be duplicated.\n                               // if current case is step back, it means the logs has been reserved\n                               // through `reset_form` above\n                               if (duplicating && !step_back) {\n                                   _private_log->append(\n                                       mu, LPC_WRITE_REPLICATION_LOG_COMMON, &_tracker, nullptr);\n                               }\n                           }\n                       });\n\n    err = mutation_log::replay(state.files,\n                               [&plist](int log_length, mutation_ptr &mu) {\n                                   auto d = mu->data.header.decree;\n                                   if (d <= plist.last_committed_decree())\n                                       return false;\n\n                                   auto old = plist.get_mutation_by_decree(d);\n                                   if (old != nullptr &&\n                                       old->data.header.ballot >= mu->data.header.ballot)\n                                       return false;\n\n                                   plist.prepare(mu, partition_status::PS_SECONDARY);\n                                   return true;\n                               },\n                               offset);\n\n    // update first_learn_start_decree, the position where the first round of LT_LOG starts from.\n    // we use this value to determine whether to learn back from min_confirmed_decree\n    // for duplication:\n    //\n    //                confirmed\n    //                    |\n    // learner's plog: ==[=========[--------------]\n    //                   |         |              |\n    //                   |       gced           committed\n    //     first_learn_start_decree\n    //\n    // because the learned logs (under `learn/` dir) have covered all the unconfirmed,\n    // the next round of learn will start from committed+1.\n    //\n    if (state.__isset.learn_start_decree &&\n        (_potential_secondary_states.first_learn_start_decree < 0 ||\n         _potential_secondary_states.first_learn_start_decree > state.learn_start_decree)) {\n        _potential_secondary_states.first_learn_start_decree = state.learn_start_decree;\n    }\n\n    ddebug_replica(\"apply_learned_state_from_private_log[{}]: duplicating={}, step_back={}, \"\n                   \"learnee = {}, learn_duration = {} ms, apply private log files done, file_count \"\n                   \"={}, first_learn_start_decree ={}, learn_start_decree = {}, \"\n                   \"app_committed_decree = {}\",\n                   _potential_secondary_states.learning_version,\n                   duplicating,\n                   step_back,\n                   _config.primary.to_string(),\n                   _potential_secondary_states.duration_ms(),\n                   state.files.size(),\n                   _potential_secondary_states.first_learn_start_decree,\n                   state.learn_start_decree,\n                   _app->last_committed_decree());\n\n    // apply in-buffer private logs\n    if (err == ERR_OK) {\n        int replay_count = 0;\n        binary_reader reader(state.meta);\n        while (!reader.is_eof()) {\n            auto mu = mutation::read_from(reader, nullptr);\n            auto d = mu->data.header.decree;\n            if (d <= plist.last_committed_decree())\n                continue;\n\n            auto old = plist.get_mutation_by_decree(d);\n            if (old != nullptr && old->data.header.ballot >= mu->data.header.ballot)\n                continue;\n\n            mu->set_logged();\n            plist.prepare(mu, partition_status::PS_SECONDARY);\n            ++replay_count;\n        }\n\n        if (state.to_decree_included > last_committed_decree()) {\n            ddebug_replica(\"apply_learned_state_from_private_log[{}]: learnee ={}, \"\n                           \"learned_to_decree_included({}) > last_committed_decree({}), commit to \"\n                           \"to_decree_included\",\n                           _potential_secondary_states.learning_version,\n                           _config.primary.to_string(),\n                           state.to_decree_included,\n                           last_committed_decree());\n            plist.commit(state.to_decree_included, COMMIT_TO_DECREE_SOFT);\n        }\n\n        ddebug_replica(\" apply_learned_state_from_private_log[{}]: learnee ={}, \"\n                       \"learn_duration ={} ms, apply in-buffer private logs done, \"\n                       \"replay_count ={}, app_committed_decree = {}\",\n                       _potential_secondary_states.learning_version,\n                       _config.primary.to_string(),\n                       _potential_secondary_states.duration_ms(),\n                       replay_count,\n                       _app->last_committed_decree());\n    }\n\n    // awaits for unfinished mutation writes.\n    if (duplicating) {\n        _private_log->flush();\n    }\n    return err;\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_restore.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <fstream>\n#include <boost/lexical_cast.hpp>\n\n#include <dsn/utility/error_code.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/utils.h>\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"replica.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n#include \"block_service/block_service_manager.h\"\n#include \"backup/cold_backup_context.h\"\n\nusing namespace dsn::dist::block_service;\n\nnamespace dsn {\nnamespace replication {\n\nbool replica::remove_useless_file_under_chkpt(const std::string &chkpt_dir,\n                                              const cold_backup_metadata &metadata)\n{\n    std::vector<std::string> sub_files;\n    // filename --> file_path such as: file --> ***/***/file\n    std::map<std::string, std::string> name_to_filepath;\n    if (!::dsn::utils::filesystem::get_subfiles(chkpt_dir, sub_files, false)) {\n        derror(\"%s: get subfile of dir(%s) failed\", name(), chkpt_dir.c_str());\n        return false;\n    }\n\n    for (const auto &file : sub_files) {\n        name_to_filepath.insert(\n            std::make_pair(::dsn::utils::filesystem::get_file_name(file), file));\n    }\n\n    for (const auto &f_meta : metadata.files) {\n        name_to_filepath.erase(f_meta.name);\n    }\n\n    // remove useless files execpt cold_backup_constant::BACKUP_METADATA file\n    for (const auto &pair : name_to_filepath) {\n        if (pair.first == cold_backup_constant::BACKUP_METADATA)\n            continue;\n        if (::dsn::utils::filesystem::file_exists(pair.second) &&\n            !::dsn::utils::filesystem::remove_path(pair.second)) {\n            derror(\"%s: remove useless file(%s) failed\", name(), pair.second.c_str());\n            return false;\n        }\n        ddebug(\"%s: remove useless file(%s) succeed\", name(), pair.second.c_str());\n    }\n    return true;\n}\n\nbool replica::read_cold_backup_metadata(const std::string &file,\n                                        cold_backup_metadata &backup_metadata)\n{\n    if (!::dsn::utils::filesystem::file_exists(file)) {\n        derror(\"%s: checkpoint on remote storage media is damaged, coz file(%s) doesn't exist\",\n               name(),\n               file.c_str());\n        return false;\n    }\n    int64_t file_sz = 0;\n    if (!::dsn::utils::filesystem::file_size(file, file_sz)) {\n        derror(\"%s: get file(%s) size failed\", name(), file.c_str());\n        return false;\n    }\n    std::shared_ptr<char> buf = utils::make_shared_array<char>(file_sz + 1);\n\n    std::ifstream fin(file, std::ifstream::in);\n    if (!fin.is_open()) {\n        derror(\"%s: open file(%s) failed\", name(), file.c_str());\n        return false;\n    }\n    fin.read(buf.get(), file_sz);\n    dassert(file_sz == fin.gcount(),\n            \"%s: read file(%s) failed, need %\" PRId64 \", but read %\" PRId64 \"\",\n            name(),\n            file.c_str(),\n            file_sz,\n            fin.gcount());\n    fin.close();\n\n    buf.get()[fin.gcount()] = '\\0';\n    blob bb;\n    bb.assign(std::move(buf), 0, file_sz);\n    if (!::dsn::json::json_forwarder<cold_backup_metadata>::decode(bb, backup_metadata)) {\n        derror(\"%s: file(%s) under checkpoint is damaged\", name(), file.c_str());\n        return false;\n    }\n    return true;\n}\n\nerror_code replica::download_checkpoint(const configuration_restore_request &req,\n                                        const std::string &remote_chkpt_dir,\n                                        const std::string &local_chkpt_dir)\n{\n    block_filesystem *fs =\n        _stub->_block_service_manager.get_or_create_block_filesystem(req.backup_provider_name);\n\n    // download metadata file and parse it into cold_backup_meta\n    cold_backup_metadata backup_metadata;\n    error_code err = get_backup_metadata(fs, remote_chkpt_dir, local_chkpt_dir, backup_metadata);\n    if (err != ERR_OK) {\n        return err;\n    }\n\n    // download checkpoint files\n    task_tracker tracker;\n    for (const auto &f_meta : backup_metadata.files) {\n        tasking::enqueue(\n            TASK_CODE_EXEC_INLINED,\n            &tracker,\n            [this, &err, remote_chkpt_dir, local_chkpt_dir, f_meta, fs]() {\n                uint64_t f_size = 0;\n                error_code download_err = _stub->_block_service_manager.download_file(\n                    remote_chkpt_dir, local_chkpt_dir, f_meta.name, fs, f_size);\n                const std::string file_name =\n                    utils::filesystem::path_combine(local_chkpt_dir, f_meta.name);\n                if (download_err == ERR_OK || download_err == ERR_PATH_ALREADY_EXIST) {\n                    if (!utils::filesystem::verify_file(file_name, f_meta.md5, f_meta.size)) {\n                        download_err = ERR_CORRUPTION;\n                    } else if (download_err == ERR_PATH_ALREADY_EXIST) {\n                        download_err = ERR_OK;\n                        f_size = f_meta.size;\n                    }\n                }\n\n                if (download_err != ERR_OK) {\n                    derror_replica(\n                        \"failed to download file({}), error = {}\", f_meta.name, download_err);\n                    // ERR_CORRUPTION means we should rollback restore, so we can't change err if it\n                    // is ERR_CORRUPTION now, otherwise it will be overridden by other errors\n                    if (err != ERR_CORRUPTION) {\n                        err = download_err;\n                        return;\n                    }\n                }\n\n                // update progress if download file succeed\n                update_restore_progress(f_size);\n                // report current status to meta server\n                report_restore_status_to_meta();\n            });\n    }\n    tracker.wait_outstanding_tasks();\n\n    // clear useless files for restore.\n    // if err != ERR_OK, the entire directory of this replica will be deleted later.\n    // so in this situation, there is no need to clear restore.\n    if (ERR_OK == err) {\n        clear_restore_useless_files(local_chkpt_dir, backup_metadata);\n    }\n\n    return err;\n}\n\nerror_code replica::get_backup_metadata(block_filesystem *fs,\n                                        const std::string &remote_chkpt_dir,\n                                        const std::string &local_chkpt_dir,\n                                        cold_backup_metadata &backup_metadata)\n{\n    // download metadata file\n    uint64_t download_file_size = 0;\n    error_code err =\n        _stub->_block_service_manager.download_file(remote_chkpt_dir,\n                                                    local_chkpt_dir,\n                                                    cold_backup_constant::BACKUP_METADATA,\n                                                    fs,\n                                                    download_file_size);\n    if (err != ERR_OK && err != ERR_PATH_ALREADY_EXIST) {\n        derror_replica(\"download backup_metadata failed, file({}), reason({})\",\n                       utils::filesystem::path_combine(remote_chkpt_dir,\n                                                       cold_backup_constant::BACKUP_METADATA),\n                       err);\n        return err;\n    }\n\n    // parse cold_backup_meta from metadata file\n    const std::string local_backup_metada_file =\n        utils::filesystem::path_combine(local_chkpt_dir, cold_backup_constant::BACKUP_METADATA);\n    if (!read_cold_backup_metadata(local_backup_metada_file, backup_metadata)) {\n        derror_replica(\"read cold_backup_metadata from file({}) failed\", local_backup_metada_file);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    _chkpt_total_size = backup_metadata.checkpoint_total_size;\n    ddebug_replica(\n        \"recover cold_backup_metadata from file({}) succeed, total checkpoint size({}), file \"\n        \"count({})\",\n        local_backup_metada_file,\n        _chkpt_total_size,\n        backup_metadata.files.size());\n    return ERR_OK;\n}\n\nvoid replica::clear_restore_useless_files(const std::string &local_chkpt_dir,\n                                          const cold_backup_metadata &metadata)\n{\n    if (!remove_useless_file_under_chkpt(local_chkpt_dir, metadata)) {\n        dwarn_replica(\"remove useless file failed, chkpt = {}\", local_chkpt_dir);\n    } else {\n        ddebug_replica(\"remove useless file succeed, chkpt = {}\", local_chkpt_dir);\n    }\n\n    const std::string metadata_file =\n        utils::filesystem::path_combine(local_chkpt_dir, cold_backup_constant::BACKUP_METADATA);\n    if (!utils::filesystem::remove_path(metadata_file)) {\n        dwarn_replica(\"remove backup_metadata failed, file = {}\", metadata_file);\n    } else {\n        ddebug_replica(\"remove backup_metadata succeed, file = {}\", metadata_file);\n    }\n}\n\ndsn::error_code replica::find_valid_checkpoint(const configuration_restore_request &req,\n                                               std::string &remote_chkpt_dir)\n{\n    ddebug_f(\"{}: start to find valid checkpoint of backup_id {}\", name(), req.time_stamp);\n\n    // we should base on old gpid to combine the path on cold backup media\n    dsn::gpid old_gpid;\n    old_gpid.set_app_id(req.app_id);\n    old_gpid.set_partition_index(_config.pid.get_partition_index());\n    std::string backup_root = req.cluster_name;\n    if (!req.restore_path.empty()) {\n        backup_root = dsn::utils::filesystem::path_combine(req.restore_path, backup_root);\n    }\n    if (!req.policy_name.empty()) {\n        backup_root = dsn::utils::filesystem::path_combine(backup_root, req.policy_name);\n    }\n    int64_t backup_id = req.time_stamp;\n\n    std::string manifest_file =\n        cold_backup::get_current_chkpt_file(backup_root, req.app_name, old_gpid, backup_id);\n    block_filesystem *fs =\n        _stub->_block_service_manager.get_or_create_block_filesystem(req.backup_provider_name);\n    if (fs == nullptr) {\n        derror_f(\"{}: get block filesystem by provider {} failed\",\n                 std::string(name()),\n                 req.backup_provider_name);\n        return ERR_CORRUPTION;\n    }\n\n    create_file_response create_response;\n    fs->create_file(\n          create_file_request{manifest_file, false},\n          TASK_CODE_EXEC_INLINED,\n          [&create_response](const create_file_response &resp) { create_response = resp; },\n          nullptr)\n        ->wait();\n\n    if (create_response.err != dsn::ERR_OK) {\n        derror_f(\"{}: create file of block_service failed, reason {}\",\n                 name(),\n                 create_response.err.to_string());\n        return create_response.err;\n    }\n\n    // TODO: check the md5sum\n    read_response r;\n    create_response.file_handle\n        ->read(read_request{0, -1},\n               TASK_CODE_EXEC_INLINED,\n               [&r](const read_response &resp) { r = resp; },\n               nullptr)\n        ->wait();\n\n    if (r.err != dsn::ERR_OK) {\n        derror_f(\"{}: read file {} failed, reason {}\",\n                 name(),\n                 create_response.file_handle->file_name(),\n                 r.err.to_string());\n        return r.err;\n    }\n\n    std::string valid_chkpt_entry(r.buffer.data(), r.buffer.length());\n    ddebug_f(\"{}: got a valid chkpt {}\", name(), valid_chkpt_entry);\n    remote_chkpt_dir = ::dsn::utils::filesystem::path_combine(\n        cold_backup::get_replica_backup_path(backup_root, req.app_name, old_gpid, backup_id),\n        valid_chkpt_entry);\n    return dsn::ERR_OK;\n}\n\ndsn::error_code replica::restore_checkpoint()\n{\n    // first check the parameter\n    configuration_restore_request restore_req;\n    auto iter = _app_info.envs.find(backup_restore_constant::BLOCK_SERVICE_PROVIDER);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::BLOCK_SERVICE_PROVIDER.c_str());\n    restore_req.backup_provider_name = iter->second;\n    iter = _app_info.envs.find(backup_restore_constant::CLUSTER_NAME);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::CLUSTER_NAME.c_str());\n    restore_req.cluster_name = iter->second;\n    iter = _app_info.envs.find(backup_restore_constant::POLICY_NAME);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::POLICY_NAME.c_str());\n    restore_req.policy_name = iter->second;\n    iter = _app_info.envs.find(backup_restore_constant::APP_NAME);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::APP_NAME.c_str());\n    restore_req.app_name = iter->second;\n    iter = _app_info.envs.find(backup_restore_constant::APP_ID);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::APP_ID.c_str());\n    restore_req.app_id = boost::lexical_cast<int32_t>(iter->second);\n\n    iter = _app_info.envs.find(backup_restore_constant::BACKUP_ID);\n    dassert(iter != _app_info.envs.end(),\n            \"%s: can't find %s in app_info.envs\",\n            name(),\n            backup_restore_constant::BACKUP_ID.c_str());\n    restore_req.time_stamp = boost::lexical_cast<int64_t>(iter->second);\n\n    bool skip_bad_partition = false;\n    if (_app_info.envs.find(backup_restore_constant::SKIP_BAD_PARTITION) != _app_info.envs.end()) {\n        skip_bad_partition = true;\n    }\n\n    iter = _app_info.envs.find(backup_restore_constant::RESTORE_PATH);\n    if (iter != _app_info.envs.end()) {\n        restore_req.__set_restore_path(iter->second);\n    }\n\n    ddebug_f(\"{}: restore checkpoint(policy_name {}, backup_id {}), restore_path({}) from {} to \"\n             \"local dir {}\",\n             name(),\n             restore_req.policy_name,\n             restore_req.time_stamp,\n             restore_req.restore_path,\n             restore_req.backup_provider_name,\n             _dir);\n\n    // then create a local restore dir if it doesn't exist\n    if (!utils::filesystem::directory_exists(_dir) && !utils::filesystem::create_directory(_dir)) {\n        derror(\"create dir %s failed\", _dir.c_str());\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    std::ostringstream os;\n    os << _dir << \"/restore.\" << restore_req.policy_name << \".\" << restore_req.time_stamp;\n    std::string restore_dir = os.str();\n    if (!utils::filesystem::directory_exists(restore_dir) &&\n        !utils::filesystem::create_directory(restore_dir)) {\n        derror_f(\"create restore dir {} failed\", restore_dir);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    // then find a valid checkpoint dir and download it\n    std::string remote_chkpt_dir;\n    error_code err = find_valid_checkpoint(restore_req, remote_chkpt_dir);\n    if (err == ERR_OK) {\n        err = download_checkpoint(restore_req, remote_chkpt_dir, restore_dir);\n    }\n\n    if (err == ERR_OBJECT_NOT_FOUND || err == ERR_CORRUPTION) {\n        if (skip_bad_partition) {\n            _restore_status = ERR_IGNORE_BAD_DATA;\n            err = skip_restore_partition(restore_dir);\n        } else {\n            _restore_status = ERR_CORRUPTION;\n            tell_meta_to_restore_rollback();\n            return ERR_CORRUPTION;\n        }\n    }\n    report_restore_status_to_meta();\n    return err;\n}\n\ndsn::error_code replica::skip_restore_partition(const std::string &restore_dir)\n{\n    // Attention: when skip restore partition, we should not delete restore_dir, but we must clear\n    // it because we use restore_dir to tell storage engine that start an app from restore\n    if (utils::filesystem::remove_path(restore_dir) &&\n        utils::filesystem::create_directory(restore_dir)) {\n        ddebug(\"%s: clear restore_dir(%s) succeed\", name(), restore_dir.c_str());\n        _restore_progress.store(cold_backup_constant::PROGRESS_FINISHED);\n        return ERR_OK;\n    } else {\n        derror(\"clear dir %s failed\", restore_dir.c_str());\n        return ERR_FILE_OPERATION_FAILED;\n    }\n}\n\nvoid replica::tell_meta_to_restore_rollback()\n{\n    configuration_drop_app_request request;\n    drop_app_options options;\n    options.success_if_not_exist = true;\n    options.__set_reserve_seconds(1);\n    request.app_name = _app_info.app_name;\n    request.options = std::move(options);\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_DROP_APP);\n    ::dsn::marshall(msg, request);\n\n    rpc_address target(_stub->_failure_detector->get_servers());\n    rpc::call(target,\n              msg,\n              &_tracker,\n              [this](error_code err, dsn::message_ex *request, dsn::message_ex *resp) {\n                  if (err == ERR_OK) {\n                      configuration_drop_app_response response;\n                      ::dsn::unmarshall(resp, response);\n                      if (response.err == ERR_OK) {\n                          ddebug(\"restore rolling backup succeed\");\n                          return;\n                      } else {\n                          tell_meta_to_restore_rollback();\n                      }\n                  } else if (err == ERR_TIMEOUT) {\n                      tell_meta_to_restore_rollback();\n                  }\n              });\n}\n\nvoid replica::report_restore_status_to_meta()\n{\n    configuration_report_restore_status_request request;\n    request.restore_status = _restore_status;\n    request.pid = _config.pid;\n    request.progress = _restore_progress.load();\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_REPORT_RESTORE_STATUS);\n    ::dsn::marshall(msg, request);\n    rpc_address target(_stub->_failure_detector->get_servers());\n    rpc::call(target,\n              msg,\n              &_tracker,\n              [](error_code err, dsn::message_ex *request, dsn::message_ex *resp) {\n                  if (err == ERR_OK) {\n                      configuration_report_restore_status_response response;\n                      ::dsn::unmarshall(resp, response);\n                      if (response.err == ERR_OK) {\n                          dinfo(\"report restore status succeed\");\n                          return;\n                      }\n                  } else if (err == ERR_TIMEOUT) {\n                      // TODO: we should retry to make the result more precisely\n                      // report_restore_status_to_meta();\n                  }\n              });\n}\n\nvoid replica::update_restore_progress(uint64_t f_size)\n{\n    if (_chkpt_total_size <= 0) {\n        derror_replica(\"cold_backup_metadata has invalid file_total_size({})\", _chkpt_total_size);\n        return;\n    }\n\n    _cur_download_size.fetch_add(f_size);\n    auto total_size = static_cast<double>(_chkpt_total_size);\n    auto cur_download_size = static_cast<double>(_cur_download_size.load());\n    auto cur_porgress = static_cast<int32_t>((cur_download_size / total_size) * 1000);\n    _restore_progress.store(cur_porgress);\n    ddebug_replica(\"total_size = {}, cur_downloaded_size = {}, progress = {}\",\n                   total_size,\n                   cur_download_size,\n                   cur_porgress);\n}\n}\n}\n"
  },
  {
    "path": "src/replica/replica_stub.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     replica container - replica stub\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"replica.h\"\n#include \"replica_stub.h\"\n#include \"mutation_log.h\"\n#include \"mutation.h\"\n#include \"bulk_load/replica_bulk_loader.h\"\n#include \"duplication/duplication_sync_timer.h\"\n#include \"backup/replica_backup_server.h\"\n#include \"split/replica_split_manager.h\"\n#include \"replica_disk_migrator.h\"\n#include \"disk_cleaner.h\"\n\n#include <boost/algorithm/string/replace.hpp>\n#include <dsn/cpp/json_helper.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/rand.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/enum_helper.h>\n#include <vector>\n#include <deque>\n#include <dsn/dist/fmt_logging.h>\n#ifdef DSN_ENABLE_GPERF\n#include <gperftools/malloc_extension.h>\n#endif\n#include <dsn/utility/fail_point.h>\n#include <dsn/dist/remote_command.h>\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DEFINE_bool(\"replication\",\n                ignore_broken_disk,\n                true,\n                \"true means ignore broken data disk when initialize\");\n\nDSN_DEFINE_uint32(\"replication\",\n                  max_concurrent_manual_emergency_checkpointing_count,\n                  10,\n                  \"max concurrent manual emergency checkpoint running count\");\nDSN_TAG_VARIABLE(max_concurrent_manual_emergency_checkpointing_count, FT_MUTABLE);\n\nbool replica_stub::s_not_exit_on_log_failure = false;\n\nreplica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,\n                           bool is_long_subscriber /* = true*/)\n    : serverlet(\"replica_stub\"),\n      _kill_partition_command(nullptr),\n      _deny_client_command(nullptr),\n      _verbose_client_log_command(nullptr),\n      _verbose_commit_log_command(nullptr),\n      _trigger_chkpt_command(nullptr),\n      _query_compact_command(nullptr),\n      _query_app_envs_command(nullptr),\n      _max_concurrent_bulk_load_downloading_count_command(nullptr),\n      _deny_client(false),\n      _verbose_client_log(false),\n      _verbose_commit_log(false),\n      _release_tcmalloc_memory(false),\n      _mem_release_max_reserved_mem_percentage(10),\n      _max_concurrent_bulk_load_downloading_count(5),\n      _learn_app_concurrent_count(0),\n      _fs_manager(false),\n      _bulk_load_downloading_count(0),\n      _manual_emergency_checkpointing_count(0),\n      _is_running(false)\n{\n#ifdef DSN_ENABLE_GPERF\n    _is_releasing_memory = false;\n    _release_tcmalloc_memory_command = nullptr;\n    _get_tcmalloc_status_command = nullptr;\n    _max_reserved_memory_percentage_command = nullptr;\n    _release_all_reserved_memory_command = nullptr;\n#endif\n    _replica_state_subscriber = subscriber;\n    _is_long_subscriber = is_long_subscriber;\n    _failure_detector = nullptr;\n    _state = NS_Disconnected;\n    _log = nullptr;\n    _primary_address_str[0] = '\\0';\n    install_perf_counters();\n\n    _max_allowed_write_size = dsn_config_get_value_uint64(\"replication\",\n                                                          \"max_allowed_write_size\",\n                                                          1 << 20,\n                                                          \"write operation exceed this \"\n                                                          \"threshold will be logged and reject, \"\n                                                          \"default is 1MB, 0 means no check\");\n}\n\nreplica_stub::~replica_stub(void) { close(); }\n\nvoid replica_stub::install_perf_counters()\n{\n    _counter_replicas_count.init_app_counter(\n        \"eon.replica_stub\", \"replica(Count)\", COUNTER_TYPE_NUMBER, \"# in replica_stub._replicas\");\n    _counter_replicas_opening_count.init_app_counter(\"eon.replica_stub\",\n                                                     \"opening.replica(Count)\",\n                                                     COUNTER_TYPE_NUMBER,\n                                                     \"# in replica_stub._opening_replicas\");\n    _counter_replicas_closing_count.init_app_counter(\"eon.replica_stub\",\n                                                     \"closing.replica(Count)\",\n                                                     COUNTER_TYPE_NUMBER,\n                                                     \"# in replica_stub._closing_replicas\");\n    _counter_replicas_commit_qps.init_app_counter(\"eon.replica_stub\",\n                                                  \"replicas.commit.qps\",\n                                                  COUNTER_TYPE_RATE,\n                                                  \"server-level commit throughput\");\n    _counter_replicas_learning_count.init_app_counter(\"eon.replica_stub\",\n                                                      \"replicas.learning.count\",\n                                                      COUNTER_TYPE_NUMBER,\n                                                      \"current learning count\");\n    _counter_replicas_learning_max_duration_time_ms.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.max.duration.time(ms)\",\n        COUNTER_TYPE_NUMBER,\n        \"current learning max duration time(ms)\");\n    _counter_replicas_learning_max_copy_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.max.copy.file.size\",\n        COUNTER_TYPE_NUMBER,\n        \"current learning max copy file size\");\n    _counter_replicas_learning_recent_start_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.start.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current learning start count in the recent period\");\n    _counter_replicas_learning_recent_round_start_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.round.start.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning round start count in the recent period\");\n    _counter_replicas_learning_recent_copy_file_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.copy.file.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning copy file count in the recent period\");\n    _counter_replicas_learning_recent_copy_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.copy.file.size\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning copy file size in the recent period\");\n    _counter_replicas_learning_recent_copy_buffer_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.copy.buffer.size\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning copy buffer size in the recent period\");\n    _counter_replicas_learning_recent_learn_cache_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.cache.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning LT_CACHE count in the recent period\");\n    _counter_replicas_learning_recent_learn_app_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.app.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning LT_APP count in the recent period\");\n    _counter_replicas_learning_recent_learn_log_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.log.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning LT_LOG count in the recent period\");\n    _counter_replicas_learning_recent_learn_reset_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.reset.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning reset count in the recent period\"\n        \"for the reason of resp.last_committed_decree < _app->last_committed_decree()\");\n    _counter_replicas_learning_recent_learn_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning fail count in the recent period\");\n    _counter_replicas_learning_recent_learn_succ_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.learning.recent.learn.succ.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"learning succeed count in the recent period\");\n\n    _counter_replicas_recent_prepare_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.recent.prepare.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"prepare fail count in the recent period\");\n    _counter_replicas_recent_replica_move_error_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.recent.replica.move.error.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"replica move to error count in the recent period\");\n    _counter_replicas_recent_replica_move_garbage_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.recent.replica.move.garbage.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"replica move to garbage count in the recent period\");\n    _counter_replicas_recent_replica_remove_dir_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.recent.replica.remove.dir.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"replica directory remove count in the recent period\");\n    _counter_replicas_error_replica_dir_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.error.replica.dir.count\",\n        COUNTER_TYPE_NUMBER,\n        \"error replica directory(*.err) count\");\n    _counter_replicas_garbage_replica_dir_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.garbage.replica.dir.count\",\n        COUNTER_TYPE_NUMBER,\n        \"garbage replica directory(*.gar) count\");\n    _counter_replicas_tmp_replica_dir_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.tmp.replica.dir.count\",\n        COUNTER_TYPE_NUMBER,\n        \"disk migration tmp replica directory(*.tmp) count\");\n    _counter_replicas_origin_replica_dir_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.origin.replica.dir.count\",\n        COUNTER_TYPE_NUMBER,\n        \"disk migration origin replica directory(.ori) count\");\n\n    _counter_replicas_recent_group_check_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.recent.group.check.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"group check fail count in the recent period\");\n\n    _counter_shared_log_size.init_app_counter(\n        \"eon.replica_stub\", \"shared.log.size(MB)\", COUNTER_TYPE_NUMBER, \"shared log size(MB)\");\n    _counter_shared_log_recent_write_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"shared.log.recent.write.size\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"shared log write size in the recent period\");\n    _counter_recent_trigger_emergency_checkpoint_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"recent.trigger.emergency.checkpoint.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"trigger emergency checkpoint count in the recent period\");\n\n    // <- Duplication Metrics ->\n\n    _counter_dup_confirmed_rate.init_app_counter(\"eon.replica_stub\",\n                                                 \"dup.confirmed_rate\",\n                                                 COUNTER_TYPE_RATE,\n                                                 \"increasing rate of confirmed mutations\");\n    _counter_dup_pending_mutations_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"dup.pending_mutations_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"number of mutations pending for duplication\");\n\n    // <- Cold Backup Metrics ->\n\n    _counter_cold_backup_running_count.init_app_counter(\"eon.replica_stub\",\n                                                        \"cold.backup.running.count\",\n                                                        COUNTER_TYPE_NUMBER,\n                                                        \"current cold backup count\");\n    _counter_cold_backup_recent_start_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.start.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup start count in the recent period\");\n    _counter_cold_backup_recent_succ_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.succ.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup succeed count in the recent period\");\n    _counter_cold_backup_recent_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup fail count in the recent period\");\n    _counter_cold_backup_recent_cancel_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.cancel.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup cancel count in the recent period\");\n    _counter_cold_backup_recent_pause_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.pause.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup pause count in the recent period\");\n    _counter_cold_backup_recent_upload_file_succ_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.upload.file.succ.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup upload file succeed count in the recent period\");\n    _counter_cold_backup_recent_upload_file_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.upload.file.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup upload file failed count in the recent period\");\n    _counter_cold_backup_recent_upload_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.recent.upload.file.size\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current cold backup upload file size in the recent perriod\");\n    _counter_cold_backup_max_duration_time_ms.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.max.duration.time.ms\",\n        COUNTER_TYPE_NUMBER,\n        \"current cold backup max duration time\");\n    _counter_cold_backup_max_upload_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"cold.backup.max.upload.file.size\",\n        COUNTER_TYPE_NUMBER,\n        \"current cold backup max upload file size\");\n\n    _counter_recent_read_fail_count.init_app_counter(\"eon.replica_stub\",\n                                                     \"recent.read.fail.count\",\n                                                     COUNTER_TYPE_VOLATILE_NUMBER,\n                                                     \"read fail count in the recent period\");\n    _counter_recent_write_fail_count.init_app_counter(\"eon.replica_stub\",\n                                                      \"recent.write.fail.count\",\n                                                      COUNTER_TYPE_VOLATILE_NUMBER,\n                                                      \"write fail count in the recent period\");\n    _counter_recent_read_busy_count.init_app_counter(\"eon.replica_stub\",\n                                                     \"recent.read.busy.count\",\n                                                     COUNTER_TYPE_VOLATILE_NUMBER,\n                                                     \"read busy count in the recent period\");\n    _counter_recent_write_busy_count.init_app_counter(\"eon.replica_stub\",\n                                                      \"recent.write.busy.count\",\n                                                      COUNTER_TYPE_VOLATILE_NUMBER,\n                                                      \"write busy count in the recent period\");\n\n    _counter_recent_write_size_exceed_threshold_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"recent_write_size_exceed_threshold_count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"write size exceed threshold count in the recent period\");\n\n    // <- Bulk Load Metrics ->\n\n    _counter_bulk_load_running_count.init_app_counter(\"eon.replica_stub\",\n                                                      \"bulk.load.running.count\",\n                                                      COUNTER_TYPE_VOLATILE_NUMBER,\n                                                      \"current bulk load running count\");\n    _counter_bulk_load_downloading_count.init_app_counter(\"eon.replica_stub\",\n                                                          \"bulk.load.downloading.count\",\n                                                          COUNTER_TYPE_VOLATILE_NUMBER,\n                                                          \"current bulk load downloading count\");\n    _counter_bulk_load_ingestion_count.init_app_counter(\"eon.replica_stub\",\n                                                        \"bulk.load.ingestion.count\",\n                                                        COUNTER_TYPE_VOLATILE_NUMBER,\n                                                        \"current bulk load ingestion count\");\n    _counter_bulk_load_succeed_count.init_app_counter(\"eon.replica_stub\",\n                                                      \"bulk.load.succeed.count\",\n                                                      COUNTER_TYPE_VOLATILE_NUMBER,\n                                                      \"current bulk load succeed count\");\n    _counter_bulk_load_failed_count.init_app_counter(\"eon.replica_stub\",\n                                                     \"bulk.load.failed.count\",\n                                                     COUNTER_TYPE_VOLATILE_NUMBER,\n                                                     \"current bulk load failed count\");\n    _counter_bulk_load_download_file_succ_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"bulk.load.download.file.success.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"bulk load recent download file success count\");\n    _counter_bulk_load_download_file_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"bulk.load.download.file.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"bulk load recent download file failed count\");\n    _counter_bulk_load_download_file_size.init_app_counter(\"eon.replica_stub\",\n                                                           \"bulk.load.download.file.size\",\n                                                           COUNTER_TYPE_VOLATILE_NUMBER,\n                                                           \"bulk load recent download file size\");\n    _counter_bulk_load_max_ingestion_time_ms.init_app_counter(\n        \"eon.replica_stub\",\n        \"bulk.load.max.ingestion.duration.time.ms\",\n        COUNTER_TYPE_NUMBER,\n        \"bulk load max ingestion duration time(ms)\");\n    _counter_bulk_load_max_duration_time_ms.init_app_counter(\"eon.replica_stub\",\n                                                             \"bulk.load.max.duration.time.ms\",\n                                                             COUNTER_TYPE_NUMBER,\n                                                             \"bulk load max duration time(ms)\");\n\n#ifdef DSN_ENABLE_GPERF\n    _counter_tcmalloc_release_memory_size.init_app_counter(\"eon.replica_stub\",\n                                                           \"tcmalloc.release.memory.size\",\n                                                           COUNTER_TYPE_NUMBER,\n                                                           \"current tcmalloc release memory size\");\n#endif\n\n    // <- Partition split Metrics ->\n\n    _counter_replicas_splitting_count.init_app_counter(\"eon.replica_stub\",\n                                                       \"replicas.splitting.count\",\n                                                       COUNTER_TYPE_NUMBER,\n                                                       \"current partition splitting count\");\n\n    _counter_replicas_splitting_max_duration_time_ms.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.max.duration.time(ms)\",\n        COUNTER_TYPE_NUMBER,\n        \"current partition splitting max duration time(ms)\");\n    _counter_replicas_splitting_max_async_learn_time_ms.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.max.async.learn.time(ms)\",\n        COUNTER_TYPE_NUMBER,\n        \"current partition splitting max async learn time(ms)\");\n    _counter_replicas_splitting_max_copy_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.max.copy.file.size\",\n        COUNTER_TYPE_NUMBER,\n        \"current splitting max copy file size\");\n    _counter_replicas_splitting_recent_start_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.start.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"current splitting start count in the recent period\");\n    _counter_replicas_splitting_recent_copy_file_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.copy.file.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"splitting copy file count in the recent period\");\n    _counter_replicas_splitting_recent_copy_file_size.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.copy.file.size\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"splitting copy file size in the recent period\");\n    _counter_replicas_splitting_recent_copy_mutation_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.copy.mutation.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"splitting copy mutation count in the recent period\");\n    _counter_replicas_splitting_recent_split_succ_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.split.succ.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"splitting succeed count in the recent period\");\n    _counter_replicas_splitting_recent_split_fail_count.init_app_counter(\n        \"eon.replica_stub\",\n        \"replicas.splitting.recent.split.fail.count\",\n        COUNTER_TYPE_VOLATILE_NUMBER,\n        \"splitting fail count in the recent period\");\n}\n\nvoid replica_stub::initialize(bool clear /* = false*/)\n{\n    replication_options opts;\n    opts.initialize();\n    initialize(opts, clear);\n}\n\nvoid replica_stub::initialize(const replication_options &opts, bool clear /* = false*/)\n{\n    _primary_address = dsn_primary_address();\n    strcpy(_primary_address_str, _primary_address.to_string());\n    ddebug(\"primary_address = %s\", _primary_address_str);\n\n    set_options(opts);\n    std::ostringstream oss;\n    for (int i = 0; i < _options.meta_servers.size(); ++i) {\n        if (i != 0)\n            oss << \",\";\n        oss << _options.meta_servers[i].to_string();\n    }\n    ddebug(\"meta_servers = %s\", oss.str().c_str());\n\n    _deny_client = _options.deny_client_on_start;\n    _verbose_client_log = _options.verbose_client_log_on_start;\n    _verbose_commit_log = _options.verbose_commit_log_on_start;\n    _release_tcmalloc_memory = _options.mem_release_enabled;\n    _mem_release_max_reserved_mem_percentage = _options.mem_release_max_reserved_mem_percentage;\n    _max_concurrent_bulk_load_downloading_count =\n        _options.max_concurrent_bulk_load_downloading_count;\n\n    // clear dirs if need\n    if (clear) {\n        if (!dsn::utils::filesystem::remove_path(_options.slog_dir)) {\n            dassert(false, \"Fail to remove %s.\", _options.slog_dir.c_str());\n        }\n        for (auto &dir : _options.data_dirs) {\n            if (!dsn::utils::filesystem::remove_path(dir)) {\n                dassert(false, \"Fail to remove %s.\", dir.c_str());\n            }\n        }\n    }\n\n    // init dirs\n    std::string cdir;\n    std::string err_msg;\n    if (!dsn::utils::filesystem::create_directory(_options.slog_dir, cdir, err_msg)) {\n        dassert_f(false, \"{}\", err_msg);\n    }\n    _options.slog_dir = cdir;\n    initialize_fs_manager(_options.data_dirs, _options.data_dir_tags);\n\n    _log = new mutation_log_shared(_options.slog_dir,\n                                   _options.log_shared_file_size_mb,\n                                   _options.log_shared_force_flush,\n                                   &_counter_shared_log_recent_write_size);\n    ddebug(\"slog_dir = %s\", _options.slog_dir.c_str());\n\n    // init rps\n    ddebug(\"start to load replicas\");\n\n    std::vector<std::string> dir_list;\n    for (auto &dir : _fs_manager.get_available_data_dirs()) {\n        std::vector<std::string> tmp_list;\n        if (!dsn::utils::filesystem::get_subdirectories(dir, tmp_list, false)) {\n            dassert(false, \"Fail to get subdirectories in %s.\", dir.c_str());\n        }\n        dir_list.insert(dir_list.end(), tmp_list.begin(), tmp_list.end());\n    }\n\n    replicas rps;\n    utils::ex_lock rps_lock;\n    std::deque<task_ptr> load_tasks;\n    uint64_t start_time = dsn_now_ms();\n    for (auto &dir : dir_list) {\n        if (dsn::replication::is_data_dir_invalid(dir)) {\n            ddebug_f(\"ignore dir {}\", dir);\n            continue;\n        }\n\n        load_tasks.push_back(tasking::create_task(\n            LPC_REPLICATION_INIT_LOAD,\n            &_tracker,\n            [this, dir, &rps, &rps_lock] {\n                ddebug(\"process dir %s\", dir.c_str());\n\n                auto r = replica::load(this, dir.c_str());\n                if (r != nullptr) {\n                    ddebug(\"%s@%s: load replica '%s' success, <durable, commit> = <%\" PRId64\n                           \", %\" PRId64 \">, last_prepared_decree = %\" PRId64,\n                           r->get_gpid().to_string(),\n                           dsn_primary_address().to_string(),\n                           dir.c_str(),\n                           r->last_durable_decree(),\n                           r->last_committed_decree(),\n                           r->last_prepared_decree());\n\n                    utils::auto_lock<utils::ex_lock> l(rps_lock);\n\n                    if (rps.find(r->get_gpid()) != rps.end()) {\n                        dassert(false,\n                                \"conflict replica dir: %s <--> %s\",\n                                r->dir().c_str(),\n                                rps[r->get_gpid()]->dir().c_str());\n                    }\n\n                    rps[r->get_gpid()] = r;\n                }\n            },\n            load_tasks.size()));\n        load_tasks.back()->enqueue();\n    }\n    for (auto &tsk : load_tasks) {\n        tsk->wait();\n    }\n    uint64_t finish_time = dsn_now_ms();\n\n    dir_list.clear();\n    load_tasks.clear();\n    ddebug(\"load replicas succeed, replica_count = %d, time_used = %\" PRIu64 \" ms\",\n           static_cast<int>(rps.size()),\n           finish_time - start_time);\n\n    // init shared prepare log\n    ddebug(\"start to replay shared log\");\n\n    std::map<gpid, decree> replay_condition;\n    for (auto it = rps.begin(); it != rps.end(); ++it) {\n        replay_condition[it->first] = it->second->last_committed_decree();\n    }\n\n    start_time = dsn_now_ms();\n    error_code err = _log->open(\n        [&rps](int log_length, mutation_ptr &mu) {\n            auto it = rps.find(mu->data.header.pid);\n            if (it != rps.end()) {\n                return it->second->replay_mutation(mu, false);\n            } else {\n                return false;\n            }\n        },\n        [this](error_code err) { this->handle_log_failure(err); },\n        replay_condition);\n    finish_time = dsn_now_ms();\n\n    if (err == ERR_OK) {\n        ddebug(\"replay shared log succeed, time_used = %\" PRIu64 \" ms\", finish_time - start_time);\n    } else {\n        derror(\"replay shared log failed, err = %s, time_used = %\" PRIu64 \" ms, clear all logs ...\",\n               err.to_string(),\n               finish_time - start_time);\n\n        // we must delete or update meta server the error for all replicas\n        // before we fix the logs\n        // otherwise, the next process restart may consider the replicas'\n        // state complete\n\n        // delete all replicas\n        // TODO: checkpoint latest state and update on meta server so learning is cheaper\n        for (auto it = rps.begin(); it != rps.end(); ++it) {\n            it->second->close();\n            // move to '.err' directory\n            const char *dir = it->second->dir().c_str();\n            char rename_dir[1024];\n            sprintf(rename_dir, \"%s.%\" PRIu64 \".err\", dir, dsn_now_us());\n            bool ret = dsn::utils::filesystem::rename_path(dir, rename_dir);\n            dassert(ret, \"init_replica: failed to move directory '%s' to '%s'\", dir, rename_dir);\n            dwarn(\"init_replica: {replica_dir_op} succeed to move directory '%s' to '%s'\",\n                  dir,\n                  rename_dir);\n            _counter_replicas_recent_replica_move_error_count->increment();\n        }\n        rps.clear();\n\n        // restart log service\n        _log->close();\n        _log = nullptr;\n        if (!utils::filesystem::remove_path(_options.slog_dir)) {\n            dassert(false, \"remove directory %s failed\", _options.slog_dir.c_str());\n        }\n        _log = new mutation_log_shared(_options.slog_dir,\n                                       _options.log_shared_file_size_mb,\n                                       _options.log_shared_force_flush,\n                                       &_counter_shared_log_recent_write_size);\n        auto lerr = _log->open(nullptr, [this](error_code err) { this->handle_log_failure(err); });\n        dassert(lerr == ERR_OK, \"restart log service must succeed\");\n    }\n\n    bool is_log_complete = true;\n    for (auto it = rps.begin(); it != rps.end(); ++it) {\n        auto err = it->second->background_sync_checkpoint();\n        dassert(err == ERR_OK, \"sync checkpoint failed, err = %s\", err.to_string());\n\n        it->second->reset_prepare_list_after_replay();\n\n        decree pmax = invalid_decree;\n        decree pmax_commit = invalid_decree;\n        if (it->second->private_log()) {\n            pmax = it->second->private_log()->max_decree(it->first);\n            pmax_commit = it->second->private_log()->max_commit_on_disk();\n        }\n\n        ddebug_f(\n            \"{}: load replica done, err = {}, durable = {}, committed = {}, \"\n            \"prepared = {}, ballot = {}, \"\n            \"valid_offset_in_plog = {}, max_decree_in_plog = {}, max_commit_on_disk_in_plog = {}, \"\n            \"valid_offset_in_slog = {}\",\n            it->second->name(),\n            err.to_string(),\n            it->second->last_durable_decree(),\n            it->second->last_committed_decree(),\n            it->second->max_prepared_decree(),\n            it->second->get_ballot(),\n            it->second->get_app()->init_info().init_offset_in_private_log,\n            pmax,\n            pmax_commit,\n            it->second->get_app()->init_info().init_offset_in_shared_log);\n    }\n\n    // we will mark all replicas inactive not transient unless all logs are complete\n    if (!is_log_complete) {\n        derror(\"logs are not complete for some replicas, which means that shared log is truncated, \"\n               \"mark all replicas as inactive\");\n        for (auto it = rps.begin(); it != rps.end(); ++it) {\n            it->second->set_inactive_state_transient(false);\n        }\n    }\n\n    // gc\n    if (false == _options.gc_disabled) {\n        _gc_timer_task = tasking::enqueue_timer(\n            LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,\n            &_tracker,\n            [this] { on_gc(); },\n            std::chrono::milliseconds(_options.gc_interval_ms),\n            0,\n            std::chrono::milliseconds(rand::next_u32(0, _options.gc_interval_ms)));\n    }\n\n    // disk stat\n    if (false == _options.disk_stat_disabled) {\n        _disk_stat_timer_task = ::dsn::tasking::enqueue_timer(\n            LPC_DISK_STAT,\n            &_tracker,\n            [this]() { on_disk_stat(); },\n            std::chrono::seconds(_options.disk_stat_interval_seconds),\n            0,\n            std::chrono::seconds(_options.disk_stat_interval_seconds));\n    }\n\n    // attach rps\n    _replicas = std::move(rps);\n    _counter_replicas_count->add((uint64_t)_replicas.size());\n    for (const auto &kv : _replicas) {\n        _fs_manager.add_replica(kv.first, kv.second->dir());\n    }\n\n    _nfs = dsn::nfs_node::create();\n    _nfs->start();\n\n    dist::cmd::register_remote_command_rpc();\n\n    if (_options.delay_for_fd_timeout_on_start) {\n        uint64_t now_time_ms = dsn_now_ms();\n        uint64_t delay_time_ms =\n            (_options.fd_grace_seconds + 3) * 1000; // for more 3 seconds than grace seconds\n        if (now_time_ms < dsn::utils::process_start_millis() + delay_time_ms) {\n            uint64_t delay = dsn::utils::process_start_millis() + delay_time_ms - now_time_ms;\n            ddebug(\"delay for %\" PRIu64 \"ms to make failure detector timeout\", delay);\n            tasking::enqueue(LPC_REPLICA_SERVER_DELAY_START,\n                             &_tracker,\n                             [this]() { this->initialize_start(); },\n                             0,\n                             std::chrono::milliseconds(delay));\n        } else {\n            initialize_start();\n        }\n    } else {\n        initialize_start();\n    }\n}\n\nvoid replica_stub::initialize_fs_manager(std::vector<std::string> &data_dirs,\n                                         std::vector<std::string> &data_dir_tags)\n{\n    std::string cdir;\n    std::string err_msg;\n    int count = 0;\n    std::vector<std::string> available_dirs;\n    std::vector<std::string> available_dir_tags;\n    for (auto i = 0; i < data_dir_tags.size(); ++i) {\n        std::string &dir = data_dirs[i];\n        if (dsn_unlikely(!utils::filesystem::create_directory(dir, cdir, err_msg) ||\n                         !utils::filesystem::check_dir_rw(dir, err_msg))) {\n            if (FLAGS_ignore_broken_disk) {\n                dwarn_f(\"data dir[{}] is broken, ignore it, error:{}\", dir, err_msg);\n            } else {\n                dassert_f(false, \"{}\", err_msg);\n            }\n            continue;\n        }\n        ddebug_f(\"data_dirs[{}] = {}\", count, cdir);\n        available_dirs.emplace_back(cdir);\n        available_dir_tags.emplace_back(data_dir_tags[i]);\n        count++;\n    }\n\n    dassert_f(available_dirs.size() > 0,\n              \"initialize fs manager failed, no available data directory\");\n    error_code err = _fs_manager.initialize(available_dirs, available_dir_tags, false);\n    dassert_f(err == dsn::ERR_OK, \"initialize fs manager failed, err({})\", err);\n}\n\nvoid replica_stub::initialize_start()\n{\n    if (_is_running) {\n        return;\n    }\n\n    // start timer for configuration sync\n    if (!_options.config_sync_disabled) {\n        _config_sync_timer_task =\n            tasking::enqueue_timer(LPC_QUERY_CONFIGURATION_ALL,\n                                   &_tracker,\n                                   [this]() {\n                                       zauto_lock l(_state_lock);\n                                       this->query_configuration_by_node();\n                                   },\n                                   std::chrono::milliseconds(_options.config_sync_interval_ms),\n                                   0,\n                                   std::chrono::milliseconds(_options.config_sync_interval_ms));\n    }\n\n#ifdef DSN_ENABLE_GPERF\n    _mem_release_timer_task =\n        tasking::enqueue_timer(LPC_MEM_RELEASE,\n                               &_tracker,\n                               std::bind(&replica_stub::gc_tcmalloc_memory, this, false),\n                               std::chrono::milliseconds(_options.mem_release_check_interval_ms),\n                               0,\n                               std::chrono::milliseconds(_options.mem_release_check_interval_ms));\n#endif\n\n    if (_options.duplication_enabled) {\n        _duplication_sync_timer = dsn::make_unique<duplication_sync_timer>(this);\n        _duplication_sync_timer->start();\n    }\n\n    _backup_server = dsn::make_unique<replica_backup_server>(this);\n\n    // init liveness monitor\n    dassert(NS_Disconnected == _state, \"\");\n    if (_options.fd_disabled == false) {\n        _failure_detector = std::make_shared<dsn::dist::slave_failure_detector_with_multimaster>(\n            _options.meta_servers,\n            [this]() { this->on_meta_server_disconnected(); },\n            [this]() { this->on_meta_server_connected(); });\n\n        auto err = _failure_detector->start(_options.fd_check_interval_seconds,\n                                            _options.fd_beacon_interval_seconds,\n                                            _options.fd_lease_seconds,\n                                            _options.fd_grace_seconds);\n        dassert(err == ERR_OK, \"FD start failed, err = %s\", err.to_string());\n\n        _failure_detector->register_master(_failure_detector->current_server_contact());\n    } else {\n        _state = NS_Connected;\n    }\n\n    _is_running = true;\n}\n\ndsn::error_code replica_stub::on_kill_replica(gpid id)\n{\n    ddebug(\"kill replica: gpid = %s\", id.to_string());\n    if (id.get_app_id() == -1 || id.get_partition_index() == -1) {\n        replicas rs;\n        {\n            zauto_read_lock l(_replicas_lock);\n            rs = _replicas;\n        }\n        for (auto it = rs.begin(); it != rs.end(); ++it) {\n            replica_ptr &r = it->second;\n            if (id.get_app_id() == -1 || id.get_app_id() == r->get_gpid().get_app_id())\n                r->inject_error(ERR_INJECTED);\n        }\n        return ERR_OK;\n    } else {\n        error_code err = ERR_INVALID_PARAMETERS;\n        replica_ptr r = get_replica(id);\n        if (r == nullptr) {\n            err = ERR_OBJECT_NOT_FOUND;\n        } else {\n            r->inject_error(ERR_INJECTED);\n            err = ERR_OK;\n        }\n        return err;\n    }\n}\n\nreplica_ptr replica_stub::get_replica(gpid id) const\n{\n    zauto_read_lock l(_replicas_lock);\n    auto it = _replicas.find(id);\n    if (it != _replicas.end())\n        return it->second;\n    else\n        return nullptr;\n}\n\nreplica_stub::replica_life_cycle replica_stub::get_replica_life_cycle(gpid id)\n{\n    zauto_read_lock l(_replicas_lock);\n    if (_opening_replicas.find(id) != _opening_replicas.end())\n        return replica_stub::RL_creating;\n    if (_replicas.find(id) != _replicas.end())\n        return replica_stub::RL_serving;\n    if (_closing_replicas.find(id) != _closing_replicas.end())\n        return replica_stub::RL_closing;\n    if (_closed_replicas.find(id) != _closed_replicas.end())\n        return replica_stub::RL_closed;\n    return replica_stub::RL_invalid;\n}\n\nvoid replica_stub::on_client_write(gpid id, dsn::message_ex *request)\n{\n    if (_deny_client) {\n        // ignore and do not reply\n        return;\n    }\n    if (_verbose_client_log && request) {\n        ddebug(\"%s@%s: client = %s, code = %s, timeout = %d\",\n               id.to_string(),\n               _primary_address_str,\n               request->header->from_address.to_string(),\n               request->header->rpc_name,\n               request->header->client.timeout_ms);\n    }\n    replica_ptr rep = get_replica(id);\n    if (rep != nullptr) {\n        rep->on_client_write(request);\n    } else {\n        response_client(id, false, request, partition_status::PS_INVALID, ERR_OBJECT_NOT_FOUND);\n    }\n}\n\nvoid replica_stub::on_client_read(gpid id, dsn::message_ex *request)\n{\n    if (_deny_client) {\n        // ignore and do not reply\n        return;\n    }\n    if (_verbose_client_log && request) {\n        ddebug(\"%s@%s: client = %s, code = %s, timeout = %d\",\n               id.to_string(),\n               _primary_address_str,\n               request->header->from_address.to_string(),\n               request->header->rpc_name,\n               request->header->client.timeout_ms);\n    }\n    replica_ptr rep = get_replica(id);\n    if (rep != nullptr) {\n        rep->on_client_read(request);\n    } else {\n        response_client(id, true, request, partition_status::PS_INVALID, ERR_OBJECT_NOT_FOUND);\n    }\n}\n\nvoid replica_stub::on_config_proposal(const configuration_update_request &proposal)\n{\n    if (!is_connected()) {\n        dwarn(\"%s@%s: received config proposal %s for %s: not connected, ignore\",\n              proposal.config.pid.to_string(),\n              _primary_address_str,\n              enum_to_string(proposal.type),\n              proposal.node.to_string());\n        return;\n    }\n\n    ddebug(\"%s@%s: received config proposal %s for %s\",\n           proposal.config.pid.to_string(),\n           _primary_address_str,\n           enum_to_string(proposal.type),\n           proposal.node.to_string());\n\n    replica_ptr rep = get_replica(proposal.config.pid);\n    if (rep == nullptr) {\n        if (proposal.type == config_type::CT_ASSIGN_PRIMARY) {\n            std::shared_ptr<configuration_update_request> req2(new configuration_update_request);\n            *req2 = proposal;\n            begin_open_replica(proposal.info, proposal.config.pid, nullptr, req2);\n        } else if (proposal.type == config_type::CT_UPGRADE_TO_PRIMARY) {\n            remove_replica_on_meta_server(proposal.info, proposal.config);\n        }\n    }\n\n    if (rep != nullptr) {\n        rep->on_config_proposal((configuration_update_request &)proposal);\n    }\n}\n\nvoid replica_stub::on_query_decree(query_replica_decree_rpc rpc)\n{\n    const query_replica_decree_request &req = rpc.request();\n    query_replica_decree_response &resp = rpc.response();\n\n    replica_ptr rep = get_replica(req.pid);\n    if (rep != nullptr) {\n        resp.err = ERR_OK;\n        if (partition_status::PS_POTENTIAL_SECONDARY == rep->status()) {\n            resp.last_decree = 0;\n        } else {\n            resp.last_decree = rep->last_committed_decree();\n            // TODO: use the following to alleviate data lost\n            // resp.last_decree = rep->last_prepared_decree();\n        }\n    } else {\n        resp.err = ERR_OBJECT_NOT_FOUND;\n        resp.last_decree = 0;\n    }\n}\n\nvoid replica_stub::on_query_replica_info(query_replica_info_rpc rpc)\n{\n    query_replica_info_response &resp = rpc.response();\n    std::set<gpid> visited_replicas;\n    {\n        zauto_read_lock l(_replicas_lock);\n        for (auto it = _replicas.begin(); it != _replicas.end(); ++it) {\n            replica_ptr &r = it->second;\n            replica_info info;\n            get_replica_info(info, r);\n            if (visited_replicas.find(info.pid) == visited_replicas.end()) {\n                visited_replicas.insert(info.pid);\n                resp.replicas.push_back(std::move(info));\n            }\n        }\n        for (auto it = _closing_replicas.begin(); it != _closing_replicas.end(); ++it) {\n            const replica_info &info = std::get<3>(it->second);\n            if (visited_replicas.find(info.pid) == visited_replicas.end()) {\n                visited_replicas.insert(info.pid);\n                resp.replicas.push_back(info);\n            }\n        }\n        for (auto it = _closed_replicas.begin(); it != _closed_replicas.end(); ++it) {\n            const replica_info &info = it->second.second;\n            if (visited_replicas.find(info.pid) == visited_replicas.end()) {\n                visited_replicas.insert(info.pid);\n                resp.replicas.push_back(info);\n            }\n        }\n    }\n    resp.err = ERR_OK;\n}\n\nvoid replica_stub::on_query_last_checkpoint(query_last_checkpoint_info_rpc rpc)\n{\n    const learn_request &request = rpc.request();\n    learn_response &response = rpc.response();\n\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->on_query_last_checkpoint(response);\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_stub::on_query_disk_info(query_disk_info_rpc rpc)\n{\n    const query_disk_info_request &req = rpc.request();\n    query_disk_info_response &resp = rpc.response();\n    int app_id = 0;\n    if (!req.app_name.empty()) {\n        zauto_read_lock l(_replicas_lock);\n        if (!(app_id = get_app_id_from_replicas(req.app_name))) {\n            resp.err = ERR_OBJECT_NOT_FOUND;\n            return;\n        }\n    }\n\n    for (const auto &dir_node : _fs_manager._dir_nodes) {\n        disk_info info;\n        // app_name empty means query all app replica_count\n        if (req.app_name.empty()) {\n            info.holding_primary_replicas = dir_node->holding_primary_replicas;\n            info.holding_secondary_replicas = dir_node->holding_secondary_replicas;\n        } else {\n            const auto &primary_iter = dir_node->holding_primary_replicas.find(app_id);\n            if (primary_iter != dir_node->holding_primary_replicas.end()) {\n                info.holding_primary_replicas[app_id] = primary_iter->second;\n            }\n\n            const auto &secondary_iter = dir_node->holding_secondary_replicas.find(app_id);\n            if (secondary_iter != dir_node->holding_secondary_replicas.end()) {\n                info.holding_secondary_replicas[app_id] = secondary_iter->second;\n            }\n        }\n        info.tag = dir_node->tag;\n        info.full_dir = dir_node->full_dir;\n        info.disk_capacity_mb = dir_node->disk_capacity_mb;\n        info.disk_available_mb = dir_node->disk_available_mb;\n\n        resp.disk_infos.emplace_back(info);\n    }\n\n    resp.total_capacity_mb = _fs_manager._total_capacity_mb;\n    resp.total_available_mb = _fs_manager._total_available_mb;\n\n    resp.err = ERR_OK;\n}\n\nvoid replica_stub::on_disk_migrate(replica_disk_migrate_rpc rpc)\n{\n    const replica_disk_migrate_request &request = rpc.request();\n    replica_disk_migrate_response &response = rpc.response();\n\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->disk_migrator()->on_migrate_replica(rpc); // THREAD_POOL_DEFAULT\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_stub::on_query_app_info(query_app_info_rpc rpc)\n{\n    const query_app_info_request &req = rpc.request();\n    query_app_info_response &resp = rpc.response();\n\n    ddebug(\"got query app info request from (%s)\", req.meta_server.to_string());\n    resp.err = dsn::ERR_OK;\n    std::set<app_id> visited_apps;\n    {\n        zauto_read_lock l(_replicas_lock);\n        for (auto it = _replicas.begin(); it != _replicas.end(); ++it) {\n            replica_ptr &r = it->second;\n            const app_info &info = *r->get_app_info();\n            if (visited_apps.find(info.app_id) == visited_apps.end()) {\n                resp.apps.push_back(info);\n                visited_apps.insert(info.app_id);\n            }\n        }\n        for (auto it = _closing_replicas.begin(); it != _closing_replicas.end(); ++it) {\n            const app_info &info = std::get<2>(it->second);\n            if (visited_apps.find(info.app_id) == visited_apps.end()) {\n                resp.apps.push_back(info);\n                visited_apps.insert(info.app_id);\n            }\n        }\n        for (auto it = _closed_replicas.begin(); it != _closed_replicas.end(); ++it) {\n            const app_info &info = it->second.first;\n            if (visited_apps.find(info.app_id) == visited_apps.end()) {\n                resp.apps.push_back(info);\n                visited_apps.insert(info.app_id);\n            }\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_DEFAULT\nvoid replica_stub::on_add_new_disk(add_new_disk_rpc rpc)\n{\n    const auto &disk_str = rpc.request().disk_str;\n    auto &resp = rpc.response();\n    resp.err = ERR_OK;\n\n    std::vector<std::string> data_dirs;\n    std::vector<std::string> data_dir_tags;\n    std::string err_msg = \"\";\n    if (disk_str.empty() ||\n        !replication_options::get_data_dir_and_tag(\n            disk_str, \"\", \"replica\", data_dirs, data_dir_tags, err_msg)) {\n        resp.err = ERR_INVALID_PARAMETERS;\n        resp.__set_err_hint(fmt::format(\"invalid str({}), err_msg: {}\", disk_str, err_msg));\n        return;\n    }\n\n    for (auto i = 0; i < data_dir_tags.size(); ++i) {\n        auto dir = data_dirs[i];\n        if (_fs_manager.is_dir_node_available(dir, data_dir_tags[i])) {\n            resp.err = ERR_NODE_ALREADY_EXIST;\n            resp.__set_err_hint(\n                fmt::format(\"data_dir({}) tag({}) already available\", dir, data_dir_tags[i]));\n            return;\n        }\n\n        if (dsn_unlikely(utils::filesystem::directory_exists(dir) &&\n                         !utils::filesystem::is_directory_empty(dir).second)) {\n            resp.err = ERR_DIR_NOT_EMPTY;\n            resp.__set_err_hint(fmt::format(\"Disk({}) directory is not empty\", dir));\n            return;\n        }\n\n        std::string cdir;\n        if (dsn_unlikely(!utils::filesystem::create_directory(dir, cdir, err_msg) ||\n                         !utils::filesystem::check_dir_rw(dir, err_msg))) {\n            resp.err = ERR_FILE_OPERATION_FAILED;\n            resp.__set_err_hint(err_msg);\n            return;\n        }\n\n        ddebug_f(\"Add a new disk in fs_manager, data_dir={}, tag={}\", cdir, data_dir_tags[i]);\n        _fs_manager.add_new_dir_node(cdir, data_dir_tags[i]);\n    }\n}\n\nvoid replica_stub::on_prepare(dsn::message_ex *request)\n{\n    gpid id;\n    dsn::unmarshall(request, id);\n    replica_ptr rep = get_replica(id);\n    if (rep != nullptr) {\n        rep->on_prepare(request);\n    } else {\n        prepare_ack resp;\n        resp.pid = id;\n        resp.err = ERR_OBJECT_NOT_FOUND;\n        reply(request, resp);\n    }\n}\n\nvoid replica_stub::on_group_check(group_check_rpc rpc)\n{\n    const group_check_request &request = rpc.request();\n    group_check_response &response = rpc.response();\n    if (!is_connected()) {\n        dwarn(\"%s@%s: received group check: not connected, ignore\",\n              request.config.pid.to_string(),\n              _primary_address_str);\n        return;\n    }\n\n    ddebug(\"%s@%s: received group check, primary = %s, ballot = %\" PRId64\n           \", status = %s, last_committed_decree = %\" PRId64,\n           request.config.pid.to_string(),\n           _primary_address_str,\n           request.config.primary.to_string(),\n           request.config.ballot,\n           enum_to_string(request.config.status),\n           request.last_committed_decree);\n\n    replica_ptr rep = get_replica(request.config.pid);\n    if (rep != nullptr) {\n        rep->on_group_check(request, response);\n    } else {\n        if (request.config.status == partition_status::PS_POTENTIAL_SECONDARY) {\n            std::shared_ptr<group_check_request> req(new group_check_request);\n            *req = request;\n\n            begin_open_replica(request.app, request.config.pid, req, nullptr);\n            response.err = ERR_OK;\n            response.learner_signature = invalid_signature;\n        } else {\n            response.err = ERR_OBJECT_NOT_FOUND;\n        }\n    }\n}\n\nvoid replica_stub::on_learn(dsn::message_ex *msg)\n{\n    learn_request request;\n    ::dsn::unmarshall(msg, request);\n\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->on_learn(msg, request);\n    } else {\n        learn_response response;\n        response.err = ERR_OBJECT_NOT_FOUND;\n        reply(msg, response);\n    }\n}\n\nvoid replica_stub::on_learn_completion_notification(learn_completion_notification_rpc rpc)\n{\n    const group_check_response &report = rpc.request();\n    learn_notify_response &response = rpc.response();\n    response.pid = report.pid;\n    response.signature = report.learner_signature;\n    replica_ptr rep = get_replica(report.pid);\n    if (rep != nullptr) {\n        rep->on_learn_completion_notification(report, response);\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_stub::on_add_learner(const group_check_request &request)\n{\n    if (!is_connected()) {\n        dwarn(\"%s@%s: received add learner: not connected, ignore\",\n              request.config.pid.to_string(),\n              _primary_address_str,\n              request.config.primary.to_string());\n        return;\n    }\n\n    ddebug(\"%s@%s: received add learner, primary = %s, ballot = %\" PRId64\n           \", status = %s, last_committed_decree = %\" PRId64,\n           request.config.pid.to_string(),\n           _primary_address_str,\n           request.config.primary.to_string(),\n           request.config.ballot,\n           enum_to_string(request.config.status),\n           request.last_committed_decree);\n\n    replica_ptr rep = get_replica(request.config.pid);\n    if (rep != nullptr) {\n        rep->on_add_learner(request);\n    } else {\n        std::shared_ptr<group_check_request> req(new group_check_request);\n        *req = request;\n        begin_open_replica(request.app, request.config.pid, req, nullptr);\n    }\n}\n\nvoid replica_stub::on_remove(const replica_configuration &request)\n{\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->on_remove(request);\n    }\n}\n\nvoid replica_stub::get_replica_info(replica_info &info, replica_ptr r)\n{\n    info.pid = r->get_gpid();\n    info.ballot = r->get_ballot();\n    info.status = r->status();\n    info.app_type = r->get_app_info()->app_type;\n    info.last_committed_decree = r->last_committed_decree();\n    info.last_prepared_decree = r->last_prepared_decree();\n    info.last_durable_decree = r->last_durable_decree();\n\n    dsn::error_code err = _fs_manager.get_disk_tag(r->dir(), info.disk_tag);\n    if (dsn::ERR_OK != err) {\n        dwarn(\"get disk tag of %s failed: %s\", r->dir().c_str(), err.to_string());\n    }\n\n    info.__set_manual_compact_status(r->get_manual_compact_status());\n}\n\nvoid replica_stub::get_local_replicas(std::vector<replica_info> &replicas)\n{\n    zauto_read_lock l(_replicas_lock);\n    // local_replicas = replicas + closing_replicas + closed_replicas\n    int total_replicas = _replicas.size() + _closing_replicas.size() + _closed_replicas.size();\n    replicas.reserve(total_replicas);\n\n    for (auto &pairs : _replicas) {\n        replica_ptr &rep = pairs.second;\n        // child partition should not sync config from meta server\n        // because it is not ready in meta view\n        if (rep->status() == partition_status::PS_PARTITION_SPLIT) {\n            continue;\n        }\n        replica_info info;\n        get_replica_info(info, rep);\n        replicas.push_back(std::move(info));\n    }\n\n    for (auto &pairs : _closing_replicas) {\n        replicas.push_back(std::get<3>(pairs.second));\n    }\n\n    for (auto &pairs : _closed_replicas) {\n        replicas.push_back(pairs.second.second);\n    }\n}\n\n// run in THREAD_POOL_META_SERVER\n// assert(_state_lock.locked())\nvoid replica_stub::query_configuration_by_node()\n{\n    if (_state == NS_Disconnected) {\n        return;\n    }\n\n    if (_config_query_task != nullptr) {\n        return;\n    }\n\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_CONFIG_SYNC);\n\n    configuration_query_by_node_request req;\n    req.node = _primary_address;\n\n    // TODO: send stored replicas may cost network, we shouldn't config the frequency\n    get_local_replicas(req.stored_replicas);\n    req.__isset.stored_replicas = true;\n\n    ::dsn::marshall(msg, req);\n\n    ddebug(\"send query node partitions request to meta server, stored_replicas_count = %d\",\n           (int)req.stored_replicas.size());\n\n    rpc_address target(_failure_detector->get_servers());\n    _config_query_task =\n        rpc::call(target,\n                  msg,\n                  &_tracker,\n                  [this](error_code err, dsn::message_ex *request, dsn::message_ex *resp) {\n                      on_node_query_reply(err, request, resp);\n                  });\n}\n\nvoid replica_stub::on_meta_server_connected()\n{\n    ddebug(\"meta server connected\");\n\n    zauto_lock l(_state_lock);\n    if (_state == NS_Disconnected) {\n        _state = NS_Connecting;\n        tasking::enqueue(LPC_QUERY_CONFIGURATION_ALL, &_tracker, [this]() {\n            zauto_lock l(_state_lock);\n            this->query_configuration_by_node();\n        });\n    }\n}\n\n// run in THREAD_POOL_META_SERVER\nvoid replica_stub::on_node_query_reply(error_code err,\n                                       dsn::message_ex *request,\n                                       dsn::message_ex *response)\n{\n    ddebug(\"query node partitions replied, err = %s\", err.to_string());\n\n    zauto_lock l(_state_lock);\n    _config_query_task = nullptr;\n    if (err != ERR_OK) {\n        if (_state == NS_Connecting) {\n            query_configuration_by_node();\n        }\n    } else {\n        if (_state == NS_Connecting) {\n            _state = NS_Connected;\n        }\n\n        // DO NOT UPDATE STATE WHEN DISCONNECTED\n        if (_state != NS_Connected)\n            return;\n\n        configuration_query_by_node_response resp;\n        ::dsn::unmarshall(response, resp);\n\n        if (resp.err == ERR_BUSY) {\n            int delay_ms = 500;\n            ddebug(\"resend query node partitions request after %d ms for resp.err = ERR_BUSY\",\n                   delay_ms);\n            _config_query_task = tasking::enqueue(LPC_QUERY_CONFIGURATION_ALL,\n                                                  &_tracker,\n                                                  [this]() {\n                                                      zauto_lock l(_state_lock);\n                                                      _config_query_task = nullptr;\n                                                      this->query_configuration_by_node();\n                                                  },\n                                                  0,\n                                                  std::chrono::milliseconds(delay_ms));\n            return;\n        }\n        if (resp.err != ERR_OK) {\n            ddebug(\"ignore query node partitions response for resp.err = %s\", resp.err.to_string());\n            return;\n        }\n\n        ddebug_f(\"process query node partitions response for resp.err = ERR_OK, \"\n                 \"partitions_count({}), gc_replicas_count({})\",\n                 resp.partitions.size(),\n                 resp.gc_replicas.size());\n\n        replicas rs;\n        {\n            zauto_read_lock l(_replicas_lock);\n            rs = _replicas;\n        }\n\n        for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) {\n            rs.erase(it->config.pid);\n            tasking::enqueue(LPC_QUERY_NODE_CONFIGURATION_SCATTER,\n                             &_tracker,\n                             std::bind(&replica_stub::on_node_query_reply_scatter, this, this, *it),\n                             it->config.pid.thread_hash());\n        }\n\n        // for rps not exist on meta_servers\n        for (auto it = rs.begin(); it != rs.end(); ++it) {\n            tasking::enqueue(\n                LPC_QUERY_NODE_CONFIGURATION_SCATTER2,\n                &_tracker,\n                std::bind(&replica_stub::on_node_query_reply_scatter2, this, this, it->first),\n                it->first.thread_hash());\n        }\n\n        // handle the replicas which need to be gc\n        if (resp.__isset.gc_replicas) {\n            for (replica_info &rep : resp.gc_replicas) {\n                replica_stub::replica_life_cycle lc = get_replica_life_cycle(rep.pid);\n                if (lc == replica_stub::RL_closed) {\n                    tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,\n                                     &_tracker,\n                                     std::bind(&replica_stub::on_gc_replica, this, this, rep.pid),\n                                     0);\n                }\n            }\n        }\n    }\n}\n\nvoid replica_stub::set_meta_server_connected_for_test(\n    const configuration_query_by_node_response &resp)\n{\n    zauto_lock l(_state_lock);\n    dassert(_state != NS_Connected, \"\");\n    _state = NS_Connected;\n\n    for (auto it = resp.partitions.begin(); it != resp.partitions.end(); ++it) {\n        tasking::enqueue(LPC_QUERY_NODE_CONFIGURATION_SCATTER,\n                         &_tracker,\n                         std::bind(&replica_stub::on_node_query_reply_scatter, this, this, *it),\n                         it->config.pid.thread_hash());\n    }\n}\n\nvoid replica_stub::set_replica_state_subscriber_for_test(replica_state_subscriber subscriber,\n                                                         bool is_long_subscriber)\n{\n    _replica_state_subscriber = subscriber;\n    _is_long_subscriber = is_long_subscriber;\n}\n\n// this_ is used to hold a ref to replica_stub so we don't need to cancel the task on\n// replica_stub::close\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_stub::on_node_query_reply_scatter(replica_stub_ptr this_,\n                                               const configuration_update_request &req)\n{\n    replica_ptr replica = get_replica(req.config.pid);\n    if (replica != nullptr) {\n        replica->on_config_sync(req.info,\n                                req.config,\n                                req.__isset.meta_split_status ? req.meta_split_status\n                                                              : split_status::NOT_SPLIT);\n    } else {\n        if (req.config.primary == _primary_address) {\n            ddebug(\"%s@%s: replica not exists on replica server, which is primary, remove it \"\n                   \"from meta server\",\n                   req.config.pid.to_string(),\n                   _primary_address_str);\n            remove_replica_on_meta_server(req.info, req.config);\n        } else {\n            ddebug(\"%s@%s: replica not exists on replica server, which is not primary, just ignore\",\n                   req.config.pid.to_string(),\n                   _primary_address_str);\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_stub::on_node_query_reply_scatter2(replica_stub_ptr this_, gpid id)\n{\n    replica_ptr replica = get_replica(id);\n    if (replica != nullptr && replica->status() != partition_status::PS_POTENTIAL_SECONDARY &&\n        replica->status() != partition_status::PS_PARTITION_SPLIT) {\n        if (replica->status() == partition_status::PS_INACTIVE &&\n            dsn_now_ms() - replica->create_time_milliseconds() <\n                _options.gc_memory_replica_interval_ms) {\n            ddebug(\"%s: replica not exists on meta server, wait to close\", replica->name());\n            return;\n        }\n\n        ddebug(\"%s: replica not exists on meta server, remove\", replica->name());\n\n        // TODO: set PS_INACTIVE instead for further state reuse\n        replica->update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR);\n    }\n}\n\nvoid replica_stub::remove_replica_on_meta_server(const app_info &info,\n                                                 const partition_configuration &config)\n{\n    dsn::message_ex *msg = dsn::message_ex::create_request(RPC_CM_UPDATE_PARTITION_CONFIGURATION);\n\n    std::shared_ptr<configuration_update_request> request(new configuration_update_request);\n    request->info = info;\n    request->config = config;\n    request->config.ballot++;\n    request->node = _primary_address;\n    request->type = config_type::CT_DOWNGRADE_TO_INACTIVE;\n\n    if (_primary_address == config.primary) {\n        request->config.primary.set_invalid();\n    } else if (replica_helper::remove_node(_primary_address, request->config.secondaries)) {\n    } else {\n        return;\n    }\n\n    ::dsn::marshall(msg, *request);\n\n    rpc_address target(_failure_detector->get_servers());\n    rpc::call(_failure_detector->get_servers(),\n              msg,\n              nullptr,\n              [](error_code err, dsn::message_ex *, dsn::message_ex *) {});\n}\n\nvoid replica_stub::on_meta_server_disconnected()\n{\n    ddebug(\"meta server disconnected\");\n\n    zauto_lock l(_state_lock);\n    if (NS_Disconnected == _state)\n        return;\n\n    _state = NS_Disconnected;\n\n    replicas rs;\n    {\n        zauto_read_lock l(_replicas_lock);\n        rs = _replicas;\n    }\n\n    for (auto it = rs.begin(); it != rs.end(); ++it) {\n        tasking::enqueue(\n            LPC_CM_DISCONNECTED_SCATTER,\n            &_tracker,\n            std::bind(&replica_stub::on_meta_server_disconnected_scatter, this, this, it->first),\n            it->first.thread_hash());\n    }\n}\n\n// this_ is used to hold a ref to replica_stub so we don't need to cancel the task on\n// replica_stub::close\nvoid replica_stub::on_meta_server_disconnected_scatter(replica_stub_ptr this_, gpid id)\n{\n    {\n        zauto_lock l(_state_lock);\n        if (_state != NS_Disconnected)\n            return;\n    }\n\n    replica_ptr replica = get_replica(id);\n    if (replica != nullptr) {\n        replica->on_meta_server_disconnected();\n    }\n}\n\nvoid replica_stub::response_client(gpid id,\n                                   bool is_read,\n                                   dsn::message_ex *request,\n                                   partition_status::type status,\n                                   error_code error)\n{\n    if (error == ERR_BUSY) {\n        if (is_read)\n            _counter_recent_read_busy_count->increment();\n        else\n            _counter_recent_write_busy_count->increment();\n    } else if (error != ERR_OK) {\n        if (is_read)\n            _counter_recent_read_fail_count->increment();\n        else\n            _counter_recent_write_fail_count->increment();\n        derror(\"%s@%s: %s fail: client = %s, code = %s, timeout = %d, status = %s, error = %s\",\n               id.to_string(),\n               _primary_address_str,\n               is_read ? \"read\" : \"write\",\n               request == nullptr ? \"null\" : request->header->from_address.to_string(),\n               request == nullptr ? \"null\" : request->header->rpc_name,\n               request == nullptr ? 0 : request->header->client.timeout_ms,\n               enum_to_string(status),\n               error.to_string());\n    }\n\n    if (request != nullptr) {\n        dsn_rpc_reply(request->create_response(), error);\n    }\n}\n\nvoid replica_stub::init_gc_for_test()\n{\n    dassert(_options.gc_disabled, \"\");\n\n    _gc_timer_task = tasking::enqueue(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS,\n                                      &_tracker,\n                                      [this] { on_gc(); },\n                                      0,\n                                      std::chrono::milliseconds(_options.gc_interval_ms));\n}\n\nvoid replica_stub::on_gc_replica(replica_stub_ptr this_, gpid id)\n{\n    std::string replica_path;\n    std::pair<app_info, replica_info> closed_info;\n\n    {\n        zauto_write_lock l(_replicas_lock);\n        auto iter = _closed_replicas.find(id);\n        if (iter == _closed_replicas.end())\n            return;\n        closed_info = iter->second;\n        _closed_replicas.erase(iter);\n        _fs_manager.remove_replica(id);\n    }\n\n    replica_path = get_replica_dir(closed_info.first.app_type.c_str(), id, false);\n    if (replica_path.empty()) {\n        dwarn(\"gc closed replica(%s.%s) failed, no exist data\",\n              id.to_string(),\n              closed_info.first.app_type.c_str());\n        return;\n    }\n\n    ddebug(\"start to move replica(%s) as garbage, path: %s\", id.to_string(), replica_path.c_str());\n    char rename_path[1024];\n    sprintf(rename_path, \"%s.%\" PRIu64 \".gar\", replica_path.c_str(), dsn_now_us());\n    if (!dsn::utils::filesystem::rename_path(replica_path, rename_path)) {\n        dwarn(\n            \"gc_replica: failed to move directory '%s' to '%s'\", replica_path.c_str(), rename_path);\n\n        // if gc the replica failed, add it back\n        zauto_write_lock l(_replicas_lock);\n        _fs_manager.add_replica(id, replica_path);\n        _closed_replicas.emplace(id, closed_info);\n    } else {\n        dwarn(\"gc_replica: {replica_dir_op} succeed to move directory '%s' to '%s'\",\n              replica_path.c_str(),\n              rename_path);\n        _counter_replicas_recent_replica_move_garbage_count->increment();\n    }\n}\n\nvoid replica_stub::on_gc()\n{\n    uint64_t start = dsn_now_ns();\n\n    struct gc_info\n    {\n        replica_ptr rep;\n        partition_status::type status;\n        mutation_log_ptr plog;\n        decree last_durable_decree;\n        int64_t init_offset_in_shared_log;\n    };\n\n    std::unordered_map<gpid, gc_info> rs;\n    {\n        zauto_read_lock l(_replicas_lock);\n        // collect info in lock to prevent the case that the replica is closed in replica::close()\n        for (auto &kv : _replicas) {\n            const replica_ptr &rep = kv.second;\n            gc_info &info = rs[kv.first];\n            info.rep = rep;\n            info.status = rep->status();\n            info.plog = rep->private_log();\n            info.last_durable_decree = rep->last_durable_decree();\n            info.init_offset_in_shared_log = rep->get_app()->init_info().init_offset_in_shared_log;\n        }\n    }\n\n    ddebug(\"start to garbage collection, replica_count = %d\", (int)rs.size());\n\n    // gc shared prepare log\n    //\n    // Now that checkpoint is very important for gc, we must be able to trigger checkpoint when\n    // necessary.\n    // that is, we should be able to trigger memtable flush when necessary.\n    //\n    // How to trigger memtable flush?\n    //   we add a parameter `is_emergency' in dsn_app_async_checkpoint() function, when set true,\n    //   the undering storage system should flush memtable as soon as possiable.\n    //\n    // When to trigger memtable flush?\n    //   1. Using `[replication].checkpoint_max_interval_hours' option, we can set max interval time\n    //   of two adjacent checkpoints; If the time interval is arrived, then emergency checkpoint\n    //   will be triggered.\n    //   2. Using `[replication].log_shared_file_count_limit' option, we can set max file count of\n    //   shared log; If the limit is exceeded, then emergency checkpoint will be triggered; Instead\n    //   of triggering all replicas to do checkpoint, we will only trigger a few of necessary\n    //   replicas which block garbage collection of the oldest log file.\n    //\n    if (_log != nullptr) {\n        replica_log_info_map gc_condition;\n        for (auto &kv : rs) {\n            replica_log_info ri;\n            replica_ptr &rep = kv.second.rep;\n            mutation_log_ptr &plog = kv.second.plog;\n            if (plog) {\n                // flush private log to update plog_max_commit_on_disk,\n                // and just flush once to avoid flushing infinitely\n                plog->flush_once();\n\n                decree plog_max_commit_on_disk = plog->max_commit_on_disk();\n                ri.max_decree = std::min(kv.second.last_durable_decree, plog_max_commit_on_disk);\n                ddebug(\"gc_shared: gc condition for %s, status = %s, garbage_max_decree = %\" PRId64\n                       \", last_durable_decree= %\" PRId64 \", plog_max_commit_on_disk = %\" PRId64 \"\",\n                       rep->name(),\n                       enum_to_string(kv.second.status),\n                       ri.max_decree,\n                       kv.second.last_durable_decree,\n                       plog_max_commit_on_disk);\n            } else {\n                ri.max_decree = kv.second.last_durable_decree;\n                ddebug(\"gc_shared: gc condition for %s, status = %s, garbage_max_decree = %\" PRId64\n                       \", last_durable_decree = %\" PRId64 \"\",\n                       rep->name(),\n                       enum_to_string(kv.second.status),\n                       ri.max_decree,\n                       kv.second.last_durable_decree);\n            }\n            ri.valid_start_offset = kv.second.init_offset_in_shared_log;\n            gc_condition[kv.first] = ri;\n        }\n\n        std::set<gpid> prevent_gc_replicas;\n        int reserved_log_count = _log->garbage_collection(\n            gc_condition, _options.log_shared_file_count_limit, prevent_gc_replicas);\n        if (reserved_log_count > _options.log_shared_file_count_limit * 2) {\n            ddebug(\"gc_shared: trigger emergency checkpoint by log_shared_file_count_limit, \"\n                   \"file_count_limit = %d, reserved_log_count = %d, trigger all replicas to do \"\n                   \"checkpoint\",\n                   _options.log_shared_file_count_limit,\n                   reserved_log_count);\n            for (auto &kv : rs) {\n                tasking::enqueue(\n                    LPC_PER_REPLICA_CHECKPOINT_TIMER,\n                    kv.second.rep->tracker(),\n                    std::bind(&replica_stub::trigger_checkpoint, this, kv.second.rep, true),\n                    kv.first.thread_hash(),\n                    std::chrono::milliseconds(rand::next_u32(0, _options.gc_interval_ms / 2)));\n            }\n        } else if (reserved_log_count > _options.log_shared_file_count_limit) {\n            std::ostringstream oss;\n            int c = 0;\n            for (auto &i : prevent_gc_replicas) {\n                if (c != 0)\n                    oss << \", \";\n                oss << i.to_string();\n                c++;\n            }\n            ddebug(\"gc_shared: trigger emergency checkpoint by log_shared_file_count_limit, \"\n                   \"file_count_limit = %d, reserved_log_count = %d, prevent_gc_replica_count = %d, \"\n                   \"trigger them to do checkpoint: { %s }\",\n                   _options.log_shared_file_count_limit,\n                   reserved_log_count,\n                   (int)prevent_gc_replicas.size(),\n                   oss.str().c_str());\n            for (auto &id : prevent_gc_replicas) {\n                auto find = rs.find(id);\n                if (find != rs.end()) {\n                    tasking::enqueue(\n                        LPC_PER_REPLICA_CHECKPOINT_TIMER,\n                        find->second.rep->tracker(),\n                        std::bind(&replica_stub::trigger_checkpoint, this, find->second.rep, true),\n                        id.thread_hash(),\n                        std::chrono::milliseconds(rand::next_u32(0, _options.gc_interval_ms / 2)));\n                }\n            }\n        }\n\n        _counter_shared_log_size->set(_log->total_size() / (1024 * 1024));\n    }\n\n    // statistic learning info\n    uint64_t learning_count = 0;\n    uint64_t learning_max_duration_time_ms = 0;\n    uint64_t learning_max_copy_file_size = 0;\n    uint64_t cold_backup_running_count = 0;\n    uint64_t cold_backup_max_duration_time_ms = 0;\n    uint64_t cold_backup_max_upload_file_size = 0;\n    uint64_t bulk_load_running_count = 0;\n    uint64_t bulk_load_max_ingestion_time_ms = 0;\n    uint64_t bulk_load_max_duration_time_ms = 0;\n    uint64_t splitting_count = 0;\n    uint64_t splitting_max_duration_time_ms = 0;\n    uint64_t splitting_max_async_learn_time_ms = 0;\n    uint64_t splitting_max_copy_file_size = 0;\n    for (auto &kv : rs) {\n        replica_ptr &rep = kv.second.rep;\n        if (rep->status() == partition_status::PS_POTENTIAL_SECONDARY) {\n            learning_count++;\n            learning_max_duration_time_ms = std::max(\n                learning_max_duration_time_ms, rep->_potential_secondary_states.duration_ms());\n            learning_max_copy_file_size =\n                std::max(learning_max_copy_file_size,\n                         rep->_potential_secondary_states.learning_copy_file_size);\n        }\n        if (rep->status() == partition_status::PS_PRIMARY ||\n            rep->status() == partition_status::PS_SECONDARY) {\n            cold_backup_running_count += rep->_cold_backup_running_count.load();\n            cold_backup_max_duration_time_ms = std::max(\n                cold_backup_max_duration_time_ms, rep->_cold_backup_max_duration_time_ms.load());\n            cold_backup_max_upload_file_size = std::max(\n                cold_backup_max_upload_file_size, rep->_cold_backup_max_upload_file_size.load());\n\n            if (rep->get_bulk_loader()->get_bulk_load_status() != bulk_load_status::BLS_INVALID) {\n                bulk_load_running_count++;\n                bulk_load_max_ingestion_time_ms =\n                    std::max(bulk_load_max_ingestion_time_ms, rep->ingestion_duration_ms());\n                bulk_load_max_duration_time_ms =\n                    std::max(bulk_load_max_duration_time_ms, rep->get_bulk_loader()->duration_ms());\n            }\n        }\n        // splitting_max_copy_file_size, rep->_split_states.copy_file_size\n        if (rep->status() == partition_status::PS_PARTITION_SPLIT) {\n            splitting_count++;\n            splitting_max_duration_time_ms =\n                std::max(splitting_max_duration_time_ms, rep->_split_states.total_ms());\n            splitting_max_async_learn_time_ms =\n                std::max(splitting_max_async_learn_time_ms, rep->_split_states.async_learn_ms());\n            splitting_max_copy_file_size =\n                std::max(splitting_max_copy_file_size, rep->_split_states.splitting_copy_file_size);\n        }\n    }\n\n    _counter_replicas_learning_count->set(learning_count);\n    _counter_replicas_learning_max_duration_time_ms->set(learning_max_duration_time_ms);\n    _counter_replicas_learning_max_copy_file_size->set(learning_max_copy_file_size);\n    _counter_cold_backup_running_count->set(cold_backup_running_count);\n    _counter_cold_backup_max_duration_time_ms->set(cold_backup_max_duration_time_ms);\n    _counter_cold_backup_max_upload_file_size->set(cold_backup_max_upload_file_size);\n    _counter_bulk_load_running_count->set(bulk_load_running_count);\n    _counter_bulk_load_max_ingestion_time_ms->set(bulk_load_max_ingestion_time_ms);\n    _counter_bulk_load_max_duration_time_ms->set(bulk_load_max_duration_time_ms);\n    _counter_replicas_splitting_count->set(splitting_count);\n    _counter_replicas_splitting_max_duration_time_ms->set(splitting_max_duration_time_ms);\n    _counter_replicas_splitting_max_async_learn_time_ms->set(splitting_max_async_learn_time_ms);\n    _counter_replicas_splitting_max_copy_file_size->set(splitting_max_copy_file_size);\n\n    ddebug(\"finish to garbage collection, time_used_ns = %\" PRIu64, dsn_now_ns() - start);\n}\n\nvoid replica_stub::on_disk_stat()\n{\n    ddebug(\"start to update disk stat\");\n    uint64_t start = dsn_now_ns();\n    disk_cleaning_report report{};\n\n    dsn::replication::disk_remove_useless_dirs(_fs_manager.get_available_data_dirs(), report);\n    _fs_manager.update_disk_stat();\n    update_disk_holding_replicas();\n    update_disks_status();\n\n    _counter_replicas_error_replica_dir_count->set(report.error_replica_count);\n    _counter_replicas_garbage_replica_dir_count->set(report.garbage_replica_count);\n    _counter_replicas_tmp_replica_dir_count->set(report.disk_migrate_tmp_count);\n    _counter_replicas_origin_replica_dir_count->set(report.disk_migrate_origin_count);\n    _counter_replicas_recent_replica_remove_dir_count->add(report.remove_dir_count);\n\n    ddebug(\"finish to update disk stat, time_used_ns = %\" PRIu64, dsn_now_ns() - start);\n}\n\ntask_ptr replica_stub::begin_open_replica(\n    const app_info &app,\n    gpid id,\n    const std::shared_ptr<group_check_request> &group_check,\n    const std::shared_ptr<configuration_update_request> &configuration_update)\n{\n    _replicas_lock.lock_write();\n\n    if (_replicas.find(id) != _replicas.end()) {\n        _replicas_lock.unlock_write();\n        ddebug(\"open replica '%s.%s' failed coz replica is already opened\",\n               app.app_type.c_str(),\n               id.to_string());\n        return nullptr;\n    }\n\n    if (_opening_replicas.find(id) != _opening_replicas.end()) {\n        _replicas_lock.unlock_write();\n        ddebug(\"open replica '%s.%s' failed coz replica is under opening\",\n               app.app_type.c_str(),\n               id.to_string());\n        return nullptr;\n    }\n\n    auto it = _closing_replicas.find(id);\n    if (it != _closing_replicas.end()) {\n        task_ptr tsk = std::get<0>(it->second);\n        replica_ptr rep = std::get<1>(it->second);\n        if (rep->status() == partition_status::PS_INACTIVE && tsk->cancel(false)) {\n            // reopen it\n            _closing_replicas.erase(it);\n            _counter_replicas_closing_count->decrement();\n\n            _replicas.emplace(id, rep);\n            _counter_replicas_count->increment();\n\n            _closed_replicas.erase(id);\n\n            // unlock here to avoid dead lock\n            _replicas_lock.unlock_write();\n\n            ddebug(\"open replica '%s.%s' which is to be closed, reopen it\",\n                   app.app_type.c_str(),\n                   id.to_string());\n\n            // open by add learner\n            if (group_check != nullptr) {\n                on_add_learner(*group_check);\n            }\n        } else {\n            _replicas_lock.unlock_write();\n            ddebug(\"open replica '%s.%s' failed coz replica is under closing\",\n                   app.app_type.c_str(),\n                   id.to_string());\n        }\n        return nullptr;\n    }\n\n    task_ptr task = tasking::enqueue(\n        LPC_OPEN_REPLICA,\n        &_tracker,\n        std::bind(&replica_stub::open_replica, this, app, id, group_check, configuration_update));\n\n    _opening_replicas[id] = task;\n    _counter_replicas_opening_count->increment();\n    _closed_replicas.erase(id);\n\n    _replicas_lock.unlock_write();\n    return task;\n}\n\nvoid replica_stub::open_replica(\n    const app_info &app,\n    gpid id,\n    const std::shared_ptr<group_check_request> &group_check,\n    const std::shared_ptr<configuration_update_request> &configuration_update)\n{\n    std::string dir = get_replica_dir(app.app_type.c_str(), id, false);\n    replica_ptr rep = nullptr;\n    if (!dir.empty()) {\n        // NOTICE: if partition is DDD, and meta select one replica as primary, it will execute the\n        // load-process because of a.b.pegasus is exist, so it will never execute the restore\n        // process below\n        ddebug(\"%s@%s: start to load replica %s group check, dir = %s\",\n               id.to_string(),\n               _primary_address_str,\n               group_check ? \"with\" : \"without\",\n               dir.c_str());\n        rep = replica::load(this, dir.c_str());\n\n        // if load data failed, re-open the `*.ori` folder which is the origin replica dir of disk\n        // migration\n        if (rep == nullptr) {\n            std::string origin_tmp_dir = get_replica_dir(\n                fmt::format(\"{}{}\", app.app_type, replica_disk_migrator::kReplicaDirOriginSuffix)\n                    .c_str(),\n                id,\n                false);\n            if (!origin_tmp_dir.empty()) {\n                ddebug_f(\"mark the dir {} is garbage, start revert and load disk migration origin \"\n                         \"replica data({})\",\n                         dir,\n                         origin_tmp_dir);\n                dsn::utils::filesystem::rename_path(dir,\n                                                    fmt::format(\"{}{}\", dir, kFolderSuffixGar));\n\n                std::string origin_dir = origin_tmp_dir;\n                // revert the origin replica dir\n                boost::replace_first(\n                    origin_dir, replica_disk_migrator::kReplicaDirOriginSuffix, \"\");\n                dsn::utils::filesystem::rename_path(origin_tmp_dir, origin_dir);\n                rep = replica::load(this, origin_dir.c_str());\n\n                FAIL_POINT_INJECT_F(\"mock_replica_load\", [&](string_view) -> void {});\n            }\n        }\n    }\n\n    if (rep == nullptr) {\n        // NOTICE: if dir a.b.pegasus does not exist, or .app-info does not exist, but the ballot >\n        // 0, or the last_committed_decree > 0, start replica will fail\n        if ((configuration_update != nullptr) && (configuration_update->info.is_stateful)) {\n            dassert_f(configuration_update->config.ballot == 0 &&\n                          configuration_update->config.last_committed_decree == 0,\n                      \"{}@{}: cannot load replica({}.{}), ballot = {}, \"\n                      \"last_committed_decree = {}, but it does not existed!\",\n                      id.to_string(),\n                      _primary_address_str,\n                      id.to_string(),\n                      app.app_type.c_str(),\n                      configuration_update->config.ballot,\n                      configuration_update->config.last_committed_decree);\n        }\n\n        // NOTICE: only new_replica_group's assign_primary will execute this; if server restart when\n        // download restore-data from cold backup media, the a.b.pegasus will move to\n        // a.b.pegasus.timestamp.err when replica-server load all the replicas, so restore-flow will\n        // do it again\n\n        bool restore_if_necessary =\n            ((configuration_update != nullptr) &&\n             (configuration_update->type == config_type::CT_ASSIGN_PRIMARY) &&\n             (app.envs.find(backup_restore_constant::POLICY_NAME) != app.envs.end()));\n\n        bool is_duplication_follower =\n            ((configuration_update != nullptr) &&\n             (configuration_update->type == config_type::CT_ASSIGN_PRIMARY) &&\n             (app.envs.find(duplication_constants::kDuplicationEnvMasterClusterKey) !=\n              app.envs.end()) &&\n             (app.envs.find(duplication_constants::kDuplicationEnvMasterMetasKey) !=\n              app.envs.end()));\n\n        // NOTICE: when we don't need execute restore-process, we should remove a.b.pegasus\n        // directory because it don't contain the valid data dir and also we need create a new\n        // replica(if contain valid data, it will execute load-process)\n\n        if (!restore_if_necessary && ::dsn::utils::filesystem::directory_exists(dir)) {\n            if (!::dsn::utils::filesystem::remove_path(dir)) {\n                dassert(false, \"remove useless directory(%s) failed\", dir.c_str());\n                return;\n            }\n        }\n        rep = replica::newr(this, id, app, restore_if_necessary, is_duplication_follower);\n    }\n\n    if (rep == nullptr) {\n        ddebug(\"%s@%s: open replica failed, erase from opening replicas\",\n               id.to_string(),\n               _primary_address_str);\n        zauto_write_lock l(_replicas_lock);\n        auto ret = _opening_replicas.erase(id);\n        dassert(ret > 0, \"replica %s is not in _opening_replicas\", id.to_string());\n        _counter_replicas_opening_count->decrement();\n        return;\n    }\n\n    {\n        zauto_write_lock l(_replicas_lock);\n        auto ret = _opening_replicas.erase(id);\n        dassert(ret > 0, \"replica %s is not in _opening_replicas\", id.to_string());\n        _counter_replicas_opening_count->decrement();\n\n        auto it = _replicas.find(id);\n        dassert(it == _replicas.end(), \"replica %s is already in _replicas\", id.to_string());\n        _replicas.insert(replicas::value_type(rep->get_gpid(), rep));\n        _counter_replicas_count->increment();\n\n        _closed_replicas.erase(id);\n    }\n\n    if (nullptr != group_check) {\n        rpc::call_one_way_typed(_primary_address,\n                                RPC_LEARN_ADD_LEARNER,\n                                *group_check,\n                                group_check->config.pid.thread_hash());\n    } else if (nullptr != configuration_update) {\n        rpc::call_one_way_typed(_primary_address,\n                                RPC_CONFIG_PROPOSAL,\n                                *configuration_update,\n                                configuration_update->config.pid.thread_hash());\n    }\n}\n\ntask_ptr replica_stub::begin_close_replica(replica_ptr r)\n{\n    dassert_f(r->status() == partition_status::PS_ERROR ||\n                  r->status() == partition_status::PS_INACTIVE ||\n                  r->disk_migrator()->status() >= disk_migration_status::MOVED,\n              \"invalid state(partition_status={}, migration_status={}) when calling \"\n              \"replica({}) close\",\n              enum_to_string(r->status()),\n              enum_to_string(r->disk_migrator()->status()),\n              r->name());\n\n    gpid id = r->get_gpid();\n\n    zauto_write_lock l(_replicas_lock);\n\n    if (_replicas.erase(id) > 0) {\n        _counter_replicas_count->decrement();\n\n        int delay_ms = 0;\n        if (r->status() == partition_status::PS_INACTIVE) {\n            delay_ms = _options.gc_memory_replica_interval_ms;\n            ddebug(\"%s: delay %d milliseconds to close replica, status = PS_INACTIVE\",\n                   r->name(),\n                   delay_ms);\n        }\n\n        app_info a_info = *(r->get_app_info());\n        replica_info r_info;\n        get_replica_info(r_info, r);\n        task_ptr task = tasking::enqueue(LPC_CLOSE_REPLICA,\n                                         &_tracker,\n                                         [=]() { close_replica(r); },\n                                         0,\n                                         std::chrono::milliseconds(delay_ms));\n        _closing_replicas[id] = std::make_tuple(task, r, std::move(a_info), std::move(r_info));\n        _counter_replicas_closing_count->increment();\n        return task;\n    } else {\n        return nullptr;\n    }\n}\n\nvoid replica_stub::close_replica(replica_ptr r)\n{\n    ddebug(\"%s: start to close replica\", r->name());\n\n    gpid id = r->get_gpid();\n    std::string name = r->name();\n\n    r->close();\n\n    {\n        zauto_write_lock l(_replicas_lock);\n        auto find = _closing_replicas.find(id);\n        dassert(find != _closing_replicas.end(),\n                \"replica %s is not in _closing_replicas\",\n                name.c_str());\n        _closed_replicas.emplace(\n            id, std::make_pair(std::get<2>(find->second), std::get<3>(find->second)));\n        _closing_replicas.erase(find);\n        _counter_replicas_closing_count->decrement();\n    }\n\n    ddebug(\"%s: finish to close replica\", name.c_str());\n}\n\nvoid replica_stub::notify_replica_state_update(const replica_configuration &config, bool is_closing)\n{\n    if (nullptr != _replica_state_subscriber) {\n        if (_is_long_subscriber) {\n            tasking::enqueue(\n                LPC_REPLICA_STATE_CHANGE_NOTIFICATION,\n                &_tracker,\n                std::bind(_replica_state_subscriber, _primary_address, config, is_closing));\n        } else {\n            _replica_state_subscriber(_primary_address, config, is_closing);\n        }\n    }\n}\n\nvoid replica_stub::trigger_checkpoint(replica_ptr r, bool is_emergency)\n{\n    r->init_checkpoint(is_emergency);\n}\n\nvoid replica_stub::handle_log_failure(error_code err)\n{\n    derror(\"handle log failure: %s\", err.to_string());\n    if (!s_not_exit_on_log_failure) {\n        dassert(false, \"TODO: better log failure handling ...\");\n    }\n}\n\nvoid replica_stub::open_service()\n{\n    register_rpc_handler(RPC_CONFIG_PROPOSAL, \"ProposeConfig\", &replica_stub::on_config_proposal);\n    register_rpc_handler(RPC_PREPARE, \"prepare\", &replica_stub::on_prepare);\n    register_rpc_handler(RPC_LEARN, \"Learn\", &replica_stub::on_learn);\n    register_rpc_handler_with_rpc_holder(RPC_LEARN_COMPLETION_NOTIFY,\n                                         \"LearnNotify\",\n                                         &replica_stub::on_learn_completion_notification);\n    register_rpc_handler(RPC_LEARN_ADD_LEARNER, \"LearnAdd\", &replica_stub::on_add_learner);\n    register_rpc_handler(RPC_REMOVE_REPLICA, \"remove\", &replica_stub::on_remove);\n    register_rpc_handler_with_rpc_holder(\n        RPC_GROUP_CHECK, \"GroupCheck\", &replica_stub::on_group_check);\n    register_rpc_handler_with_rpc_holder(\n        RPC_QUERY_PN_DECREE, \"query_decree\", &replica_stub::on_query_decree);\n    register_rpc_handler_with_rpc_holder(\n        RPC_QUERY_REPLICA_INFO, \"query_replica_info\", &replica_stub::on_query_replica_info);\n    register_rpc_handler_with_rpc_holder(RPC_QUERY_LAST_CHECKPOINT_INFO,\n                                         \"query_last_checkpoint_info\",\n                                         &replica_stub::on_query_last_checkpoint);\n    register_rpc_handler_with_rpc_holder(\n        RPC_QUERY_DISK_INFO, \"query_disk_info\", &replica_stub::on_query_disk_info);\n    register_rpc_handler_with_rpc_holder(\n        RPC_REPLICA_DISK_MIGRATE, \"disk_migrate_replica\", &replica_stub::on_disk_migrate);\n    register_rpc_handler_with_rpc_holder(\n        RPC_QUERY_APP_INFO, \"query_app_info\", &replica_stub::on_query_app_info);\n    register_rpc_handler_with_rpc_holder(RPC_SPLIT_UPDATE_CHILD_PARTITION_COUNT,\n                                         \"update_child_group_partition_count\",\n                                         &replica_stub::on_update_child_group_partition_count);\n    register_rpc_handler_with_rpc_holder(RPC_SPLIT_NOTIFY_CATCH_UP,\n                                         \"child_notify_catch_up\",\n                                         &replica_stub::on_notify_primary_split_catch_up);\n    register_rpc_handler_with_rpc_holder(RPC_BULK_LOAD, \"bulk_load\", &replica_stub::on_bulk_load);\n    register_rpc_handler_with_rpc_holder(\n        RPC_GROUP_BULK_LOAD, \"group_bulk_load\", &replica_stub::on_group_bulk_load);\n    register_rpc_handler_with_rpc_holder(\n        RPC_DETECT_HOTKEY, \"detect_hotkey\", &replica_stub::on_detect_hotkey);\n    register_rpc_handler_with_rpc_holder(\n        RPC_ADD_NEW_DISK, \"add_new_disk\", &replica_stub::on_add_new_disk);\n\n    register_ctrl_command();\n}\n\nvoid replica_stub::register_ctrl_command()\n{\n    /// In simple_kv test, three replica apps are created, which means that three replica_stubs are\n    /// initialized in simple_kv test. If we don't use std::call_once, these command are registered\n    /// for three times. And in command_manager, one same command is not allowed to be registered\n    /// more than twice times. That is why we use std::call_once here. Same situation in\n    /// failure_detector::register_ctrl_commands and nfs_client_impl::register_cli_commands\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _kill_partition_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.kill_partition\"},\n            \"replica.kill_partition [app_id [partition_index]]\",\n            \"replica.kill_partition: kill partitions by (all, one app, one partition)\",\n            [this](const std::vector<std::string> &args) {\n                dsn::gpid pid;\n                if (args.size() == 0) {\n                    pid.set_app_id(-1);\n                    pid.set_partition_index(-1);\n                } else if (args.size() == 1) {\n                    pid.set_app_id(atoi(args[0].c_str()));\n                    pid.set_partition_index(-1);\n                } else if (args.size() == 2) {\n                    pid.set_app_id(atoi(args[0].c_str()));\n                    pid.set_partition_index(atoi(args[1].c_str()));\n                } else {\n                    return std::string(ERR_INVALID_PARAMETERS.to_string());\n                }\n                dsn::error_code e = this->on_kill_replica(pid);\n                return std::string(e.to_string());\n            });\n\n        _deny_client_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.deny-client\"},\n            \"replica.deny-client <true|false>\",\n            \"replica.deny-client - control if deny client read & write request\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(_deny_client, \"deny-client\", args);\n            });\n\n        _verbose_client_log_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.verbose-client-log\"},\n            \"replica.verbose-client-log <true|false>\",\n            \"replica.verbose-client-log - control if print verbose error log when reply read & \"\n            \"write request\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(\n                    _verbose_client_log, \"verbose-client-log\", args);\n            });\n\n        _verbose_commit_log_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.verbose-commit-log\"},\n            \"replica.verbose-commit-log <true|false>\",\n            \"replica.verbose-commit-log - control if print verbose log when commit mutation\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(\n                    _verbose_commit_log, \"verbose-commit-log\", args);\n            });\n\n        _trigger_chkpt_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.trigger-checkpoint\"},\n            \"replica.trigger-checkpoint [id1,id2,...] (where id is 'app_id' or \"\n            \"'app_id.partition_id')\",\n            \"replica.trigger-checkpoint - trigger replicas to do checkpoint\",\n            [this](const std::vector<std::string> &args) {\n                return exec_command_on_replica(args, true, [this](const replica_ptr &rep) {\n                    tasking::enqueue(LPC_PER_REPLICA_CHECKPOINT_TIMER,\n                                     rep->tracker(),\n                                     std::bind(&replica_stub::trigger_checkpoint, this, rep, true),\n                                     rep->get_gpid().thread_hash());\n                    return std::string(\"triggered\");\n                });\n            });\n\n        _query_compact_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.query-compact\"},\n            \"replica.query-compact [id1,id2,...] (where id is 'app_id' or 'app_id.partition_id')\",\n            \"replica.query-compact - query full compact status on the underlying storage engine\",\n            [this](const std::vector<std::string> &args) {\n                return exec_command_on_replica(args, true, [](const replica_ptr &rep) {\n                    return rep->query_manual_compact_state();\n                });\n            });\n\n        _query_app_envs_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.query-app-envs\"},\n            \"replica.query-app-envs [id1,id2,...] (where id is 'app_id' or 'app_id.partition_id')\",\n            \"replica.query-app-envs - query app envs on the underlying storage engine\",\n            [this](const std::vector<std::string> &args) {\n                return exec_command_on_replica(args, true, [](const replica_ptr &rep) {\n                    std::map<std::string, std::string> kv_map;\n                    rep->query_app_envs(kv_map);\n                    return dsn::utils::kv_map_to_string(kv_map, ',', '=');\n                });\n            });\n\n#ifdef DSN_ENABLE_GPERF\n        _release_tcmalloc_memory_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.release-tcmalloc-memory\"},\n            \"replica.release-tcmalloc-memory <true|false>\",\n            \"replica.release-tcmalloc-memory - control if try to release tcmalloc memory\",\n            [this](const std::vector<std::string> &args) {\n                return remote_command_set_bool_flag(\n                    _release_tcmalloc_memory, \"release-tcmalloc-memory\", args);\n            });\n\n        _get_tcmalloc_status_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.get-tcmalloc-status\"},\n            \"replica.get-tcmalloc-status - get status of tcmalloc\",\n            \"get status of tcmalloc\",\n            [](const std::vector<std::string> &args) {\n                char buf[4096];\n                MallocExtension::instance()->GetStats(buf, 4096);\n                return std::string(buf);\n            });\n\n        _max_reserved_memory_percentage_command = dsn::command_manager::instance().register_command(\n            {\"replica.mem-release-max-reserved-percentage\"},\n            \"replica.mem-release-max-reserved-percentage [num | DEFAULT]\",\n            \"control tcmalloc max reserved but not-used memory percentage\",\n            [this](const std::vector<std::string> &args) {\n                std::string result(\"OK\");\n                if (args.empty()) {\n                    // show current value\n                    result = \"mem-release-max-reserved-percentage = \" +\n                             std::to_string(_mem_release_max_reserved_mem_percentage);\n                    return result;\n                }\n                if (args[0] == \"DEFAULT\") {\n                    // set to default value\n                    _mem_release_max_reserved_mem_percentage =\n                        _options.mem_release_max_reserved_mem_percentage;\n                    return result;\n                }\n                int32_t percentage = 0;\n                if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || percentage > 100) {\n                    result = std::string(\"ERR: invalid arguments\");\n                } else {\n                    _mem_release_max_reserved_mem_percentage = percentage;\n                }\n                return result;\n            });\n\n        _release_all_reserved_memory_command = ::dsn::command_manager::instance().register_command(\n            {\"replica.release-all-reserved-memory\"},\n            \"replica.release-all-reserved-memory - release tcmalloc all reserved-not-used memory\",\n            \"release tcmalloc all reserverd not-used memory back to operating system\",\n            [this](const std::vector<std::string> &args) {\n                auto release_bytes = gc_tcmalloc_memory(true);\n                return \"OK, release_bytes=\" + std::to_string(release_bytes);\n            });\n#endif\n        _max_concurrent_bulk_load_downloading_count_command =\n            dsn::command_manager::instance().register_command(\n                {\"replica.max-concurrent-bulk-load-downloading-count\"},\n                \"replica.max-concurrent-bulk-load-downloading-count [num | DEFAULT]\",\n                \"control stub max_concurrent_bulk_load_downloading_count\",\n                [this](const std::vector<std::string> &args) {\n                    std::string result(\"OK\");\n                    if (args.empty()) {\n                        result = \"max_concurrent_bulk_load_downloading_count=\" +\n                                 std::to_string(_max_concurrent_bulk_load_downloading_count);\n                        return result;\n                    }\n\n                    if (args[0] == \"DEFAULT\") {\n                        _max_concurrent_bulk_load_downloading_count =\n                            _options.max_concurrent_bulk_load_downloading_count;\n                        return result;\n                    }\n\n                    int32_t count = 0;\n                    if (!dsn::buf2int32(args[0], count) || count <= 0) {\n                        result = std::string(\"ERR: invalid arguments\");\n                    } else {\n                        _max_concurrent_bulk_load_downloading_count = count;\n                    }\n                    return result;\n                });\n    });\n}\n\nstd::string\nreplica_stub::exec_command_on_replica(const std::vector<std::string> &args,\n                                      bool allow_empty_args,\n                                      std::function<std::string(const replica_ptr &rep)> func)\n{\n    if (!allow_empty_args && args.empty()) {\n        return std::string(\"invalid arguments\");\n    }\n\n    replicas rs;\n    {\n        zauto_read_lock l(_replicas_lock);\n        rs = _replicas;\n    }\n\n    std::set<gpid> required_ids;\n    replicas choosed_rs;\n    if (!args.empty()) {\n        for (int i = 0; i < args.size(); i++) {\n            std::vector<std::string> arg_strs;\n            utils::split_args(args[i].c_str(), arg_strs, ',');\n            if (arg_strs.empty()) {\n                return std::string(\"invalid arguments\");\n            }\n\n            for (const std::string &arg : arg_strs) {\n                if (arg.empty())\n                    continue;\n                gpid id;\n                int pid;\n                if (id.parse_from(arg.c_str())) {\n                    // app_id.partition_index\n                    required_ids.insert(id);\n                    auto find = rs.find(id);\n                    if (find != rs.end()) {\n                        choosed_rs[id] = find->second;\n                    }\n                } else if (sscanf(arg.c_str(), \"%d\", &pid) == 1) {\n                    // app_id\n                    for (auto kv : rs) {\n                        id = kv.second->get_gpid();\n                        if (id.get_app_id() == pid) {\n                            choosed_rs[id] = kv.second;\n                        }\n                    }\n                } else {\n                    return std::string(\"invalid arguments\");\n                }\n            }\n        }\n    } else {\n        // all replicas\n        choosed_rs = rs;\n    }\n\n    std::vector<task_ptr> tasks;\n    ::dsn::zlock results_lock;\n    std::map<gpid, std::pair<partition_status::type, std::string>> results; // id => status,result\n    for (auto &kv : choosed_rs) {\n        replica_ptr rep = kv.second;\n        task_ptr tsk = tasking::enqueue(LPC_EXEC_COMMAND_ON_REPLICA,\n                                        rep->tracker(),\n                                        [rep, &func, &results_lock, &results]() {\n                                            partition_status::type status = rep->status();\n                                            if (status != partition_status::PS_PRIMARY &&\n                                                status != partition_status::PS_SECONDARY)\n                                                return;\n                                            std::string result = func(rep);\n                                            ::dsn::zauto_lock l(results_lock);\n                                            auto &value = results[rep->get_gpid()];\n                                            value.first = status;\n                                            value.second = result;\n                                        },\n                                        rep->get_gpid().thread_hash());\n        tasks.emplace_back(std::move(tsk));\n    }\n\n    for (auto &tsk : tasks) {\n        tsk->wait();\n    }\n\n    int processed = results.size();\n    int not_found = 0;\n    for (auto &id : required_ids) {\n        if (results.find(id) == results.end()) {\n            auto &value = results[id];\n            value.first = partition_status::PS_INVALID;\n            value.second = \"not found\";\n            not_found++;\n        }\n    }\n\n    std::stringstream query_state;\n    query_state << processed << \" processed, \" << not_found << \" not found\";\n    for (auto &kv : results) {\n        query_state << \"\\n    \" << kv.first.to_string() << \"@\" << _primary_address_str;\n        if (kv.second.first != partition_status::PS_INVALID)\n            query_state << \"@\" << (kv.second.first == partition_status::PS_PRIMARY ? \"P\" : \"S\");\n        query_state << \" : \" << kv.second.second;\n    }\n\n    return query_state.str();\n}\n\nvoid replica_stub::close()\n{\n    if (!_is_running) {\n        return;\n    }\n\n    _tracker.cancel_outstanding_tasks();\n\n    // this replica may not be opened\n    // or is already closed by calling tool_app::stop_all_apps()\n    // in this case, just return\n    if (_kill_partition_command == nullptr) {\n        return;\n    }\n\n    UNREGISTER_VALID_HANDLER(_kill_partition_command);\n    UNREGISTER_VALID_HANDLER(_deny_client_command);\n    UNREGISTER_VALID_HANDLER(_verbose_client_log_command);\n    UNREGISTER_VALID_HANDLER(_verbose_commit_log_command);\n    UNREGISTER_VALID_HANDLER(_trigger_chkpt_command);\n    UNREGISTER_VALID_HANDLER(_query_compact_command);\n    UNREGISTER_VALID_HANDLER(_query_app_envs_command);\n#ifdef DSN_ENABLE_GPERF\n    UNREGISTER_VALID_HANDLER(_release_tcmalloc_memory_command);\n    UNREGISTER_VALID_HANDLER(_get_tcmalloc_status_command);\n    UNREGISTER_VALID_HANDLER(_max_reserved_memory_percentage_command);\n    UNREGISTER_VALID_HANDLER(_release_all_reserved_memory_command);\n#endif\n    UNREGISTER_VALID_HANDLER(_max_concurrent_bulk_load_downloading_count_command);\n\n    _kill_partition_command = nullptr;\n    _deny_client_command = nullptr;\n    _verbose_client_log_command = nullptr;\n    _verbose_commit_log_command = nullptr;\n    _trigger_chkpt_command = nullptr;\n    _query_compact_command = nullptr;\n    _query_app_envs_command = nullptr;\n#ifdef DSN_ENABLE_GPERF\n    _release_tcmalloc_memory_command = nullptr;\n    _get_tcmalloc_status_command = nullptr;\n    _max_reserved_memory_percentage_command = nullptr;\n    _release_all_reserved_memory_command = nullptr;\n#endif\n    _max_concurrent_bulk_load_downloading_count_command = nullptr;\n\n    if (_config_sync_timer_task != nullptr) {\n        _config_sync_timer_task->cancel(true);\n        _config_sync_timer_task = nullptr;\n    }\n\n    if (_duplication_sync_timer != nullptr) {\n        _duplication_sync_timer->close();\n        _duplication_sync_timer = nullptr;\n    }\n\n    if (_config_query_task != nullptr) {\n        _config_query_task->cancel(true);\n        _config_query_task = nullptr;\n    }\n    _state = NS_Disconnected;\n\n    if (_disk_stat_timer_task != nullptr) {\n        _disk_stat_timer_task->cancel(true);\n        _disk_stat_timer_task = nullptr;\n    }\n\n    if (_gc_timer_task != nullptr) {\n        _gc_timer_task->cancel(true);\n        _gc_timer_task = nullptr;\n    }\n\n    if (_mem_release_timer_task != nullptr) {\n        _mem_release_timer_task->cancel(true);\n        _mem_release_timer_task = nullptr;\n    }\n\n    {\n        zauto_write_lock l(_replicas_lock);\n        while (!_closing_replicas.empty()) {\n            task_ptr task = std::get<0>(_closing_replicas.begin()->second);\n            gpid tmp_gpid = _closing_replicas.begin()->first;\n            _replicas_lock.unlock_write();\n\n            task->wait();\n\n            _replicas_lock.lock_write();\n            // task will automatically remove this replica from _closing_replicas\n            if (!_closing_replicas.empty()) {\n                dassert(tmp_gpid != _closing_replicas.begin()->first,\n                        \"this replica '%s' should have been removed from _closing_replicas\",\n                        tmp_gpid.to_string());\n            }\n        }\n\n        while (!_opening_replicas.empty()) {\n            task_ptr task = _opening_replicas.begin()->second;\n            _replicas_lock.unlock_write();\n\n            task->cancel(true);\n\n            _counter_replicas_opening_count->decrement();\n            _replicas_lock.lock_write();\n            _opening_replicas.erase(_opening_replicas.begin());\n        }\n\n        while (!_replicas.empty()) {\n            _replicas.begin()->second->close();\n\n            _counter_replicas_count->decrement();\n            _replicas.erase(_replicas.begin());\n        }\n    }\n    _is_running = false;\n}\n\nstd::string replica_stub::get_replica_dir(const char *app_type, gpid id, bool create_new)\n{\n    std::string gpid_str = fmt::format(\"{}.{}\", id, app_type);\n    std::string replica_dir;\n    bool is_dir_exist = false;\n    for (const std::string &data_dir : _fs_manager.get_available_data_dirs()) {\n        std::string dir = utils::filesystem::path_combine(data_dir, gpid_str);\n        if (utils::filesystem::directory_exists(dir)) {\n            if (is_dir_exist) {\n                dassert(\n                    false, \"replica dir conflict: %s <--> %s\", dir.c_str(), replica_dir.c_str());\n            }\n            replica_dir = dir;\n            is_dir_exist = true;\n        }\n    }\n    if (replica_dir.empty() && create_new) {\n        _fs_manager.allocate_dir(id, app_type, replica_dir);\n    }\n    return replica_dir;\n}\n\nstd::string\nreplica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::string &parent_dir)\n{\n    std::string gpid_str = fmt::format(\"{}.{}\", child_pid.to_string(), app_type);\n    std::string child_dir;\n    for (const std::string &data_dir : _fs_manager.get_available_data_dirs()) {\n        std::string dir = utils::filesystem::path_combine(data_dir, gpid_str);\n        // <parent_dir> = <prefix>/<gpid>.<app_type>\n        // check if <parent_dir>'s <prefix> is equal to <data_dir>\n        if (parent_dir.substr(0, data_dir.size() + 1) == data_dir + \"/\") {\n            child_dir = dir;\n            _fs_manager.add_replica(child_pid, child_dir);\n            break;\n        }\n    }\n    dassert_f(!child_dir.empty(), \"can not find parent_dir {} in data_dirs\", parent_dir);\n    return child_dir;\n}\n\n#ifdef DSN_ENABLE_GPERF\n// Get tcmalloc numeric property (name is \"prop\") value.\n// Return -1 if get property failed (property we used will be greater than zero)\n// Properties can be found in 'gperftools/malloc_extension.h'\nstatic int64_t get_tcmalloc_numeric_property(const char *prop)\n{\n    size_t value;\n    if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) {\n        derror_f(\"Failed to get tcmalloc property {}\", prop);\n        return -1;\n    }\n    return value;\n}\n\nuint64_t replica_stub::gc_tcmalloc_memory(bool release_all)\n{\n    auto tcmalloc_released_bytes = 0;\n    if (!_release_tcmalloc_memory) {\n        _is_releasing_memory.store(false);\n        _counter_tcmalloc_release_memory_size->set(tcmalloc_released_bytes);\n        return tcmalloc_released_bytes;\n    }\n\n    if (_is_releasing_memory.load()) {\n        dwarn_f(\"This node is releasing memory...\");\n        return tcmalloc_released_bytes;\n    }\n\n    _is_releasing_memory.store(true);\n    int64_t total_allocated_bytes =\n        get_tcmalloc_numeric_property(\"generic.current_allocated_bytes\");\n    int64_t reserved_bytes = get_tcmalloc_numeric_property(\"tcmalloc.pageheap_free_bytes\");\n    if (total_allocated_bytes == -1 || reserved_bytes == -1) {\n        return tcmalloc_released_bytes;\n    }\n\n    int64_t max_reserved_bytes =\n        release_all ? 0\n                    : (total_allocated_bytes * _mem_release_max_reserved_mem_percentage / 100.0);\n    if (reserved_bytes > max_reserved_bytes) {\n        int64_t release_bytes = reserved_bytes - max_reserved_bytes;\n        tcmalloc_released_bytes = release_bytes;\n        ddebug_f(\"Memory release started, almost {} bytes will be released\", release_bytes);\n        while (release_bytes > 0) {\n            // tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking\n            // page heap for long time\n            ::MallocExtension::instance()->ReleaseToSystem(1024 * 1024);\n            release_bytes -= 1024 * 1024;\n        }\n    }\n    _counter_tcmalloc_release_memory_size->set(tcmalloc_released_bytes);\n    _is_releasing_memory.store(false);\n    return tcmalloc_released_bytes;\n}\n#endif\n\n//\n// partition split\n//\nvoid replica_stub::create_child_replica(rpc_address primary_address,\n                                        app_info app,\n                                        ballot init_ballot,\n                                        gpid child_gpid,\n                                        gpid parent_gpid,\n                                        const std::string &parent_dir)\n{\n    replica_ptr child_replica = create_child_replica_if_not_found(child_gpid, &app, parent_dir);\n    if (child_replica != nullptr) {\n        ddebug_f(\"app({}), create child replica ({}) succeed\", app.app_name, child_gpid);\n        tasking::enqueue(LPC_PARTITION_SPLIT,\n                         child_replica->tracker(),\n                         std::bind(&replica_split_manager::child_init_replica,\n                                   child_replica->get_split_manager(),\n                                   parent_gpid,\n                                   primary_address,\n                                   init_ballot),\n                         child_gpid.thread_hash());\n    } else {\n        dwarn_f(\"failed to create child replica ({}), ignore it and wait next run\", child_gpid);\n        split_replica_error_handler(\n            parent_gpid,\n            std::bind(&replica_split_manager::parent_cleanup_split_context, std::placeholders::_1));\n    }\n}\n\nreplica_ptr replica_stub::create_child_replica_if_not_found(gpid child_pid,\n                                                            app_info *app,\n                                                            const std::string &parent_dir)\n{\n    FAIL_POINT_INJECT_F(\"replica_stub_create_child_replica_if_not_found\",\n                        [=](dsn::string_view) -> replica_ptr {\n                            replica *rep = new replica(this, child_pid, *app, \"./\", false);\n                            rep->_config.status = partition_status::PS_INACTIVE;\n                            _replicas.insert(replicas::value_type(child_pid, rep));\n                            ddebug_f(\"mock create_child_replica_if_not_found succeed\");\n                            return rep;\n                        });\n\n    zauto_write_lock l(_replicas_lock);\n    auto it = _replicas.find(child_pid);\n    if (it != _replicas.end()) {\n        return it->second;\n    } else {\n        if (_opening_replicas.find(child_pid) != _opening_replicas.end()) {\n            dwarn_f(\"failed create child replica({}) because it is under open\", child_pid);\n            return nullptr;\n        } else if (_closing_replicas.find(child_pid) != _closing_replicas.end()) {\n            dwarn_f(\"failed create child replica({}) because it is under close\", child_pid);\n            return nullptr;\n        } else {\n            replica *rep = replica::newr(this, child_pid, *app, false, false, parent_dir);\n            if (rep != nullptr) {\n                auto pr = _replicas.insert(replicas::value_type(child_pid, rep));\n                dassert_f(pr.second, \"child replica {} has been existed\", rep->name());\n                _counter_replicas_count->increment();\n                _closed_replicas.erase(child_pid);\n            }\n            return rep;\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_stub::split_replica_error_handler(gpid pid, local_execution handler)\n{\n    split_replica_exec(LPC_PARTITION_SPLIT_ERROR, pid, handler);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\ndsn::error_code\nreplica_stub::split_replica_exec(dsn::task_code code, gpid pid, local_execution handler)\n{\n    FAIL_POINT_INJECT_F(\"replica_stub_split_replica_exec\", [](dsn::string_view) { return ERR_OK; });\n    replica_ptr replica = pid.get_app_id() == 0 ? nullptr : get_replica(pid);\n    if (replica && handler) {\n        tasking::enqueue(code,\n                         replica.get()->tracker(),\n                         [handler, replica]() { handler(replica->get_split_manager()); },\n                         pid.thread_hash());\n        return ERR_OK;\n    }\n    dwarn_f(\"replica({}) is invalid\", pid);\n    return ERR_OBJECT_NOT_FOUND;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_stub::on_notify_primary_split_catch_up(notify_catch_up_rpc rpc)\n{\n    const notify_catch_up_request &request = rpc.request();\n    notify_cacth_up_response &response = rpc.response();\n    replica_ptr replica = get_replica(request.parent_gpid);\n    if (replica != nullptr) {\n        replica->get_split_manager()->parent_handle_child_catch_up(request, response);\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_stub::on_update_child_group_partition_count(update_child_group_partition_count_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n    replica_ptr replica = get_replica(request.child_pid);\n    if (replica != nullptr) {\n        replica->get_split_manager()->on_update_child_group_partition_count(request, response);\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_stub::update_disk_holding_replicas()\n{\n    for (const auto &dir_node : _fs_manager._dir_nodes) {\n        // clear the holding_primary_replicas/holding_secondary_replicas and re-calculate it from\n        // holding_replicas\n        dir_node->holding_primary_replicas.clear();\n        dir_node->holding_secondary_replicas.clear();\n        for (const auto &holding_replicas : dir_node->holding_replicas) {\n            const std::set<dsn::gpid> &pids = holding_replicas.second;\n            for (const auto &pid : pids) {\n                replica_ptr replica = get_replica(pid);\n                if (replica == nullptr) {\n                    continue;\n                }\n                if (replica->status() == partition_status::PS_PRIMARY) {\n                    dir_node->holding_primary_replicas[holding_replicas.first].emplace(pid);\n                } else if (replica->status() == partition_status::PS_SECONDARY) {\n                    dir_node->holding_secondary_replicas[holding_replicas.first].emplace(pid);\n                }\n            }\n        }\n    }\n}\n\nvoid replica_stub::on_bulk_load(bulk_load_rpc rpc)\n{\n    const bulk_load_request &request = rpc.request();\n    bulk_load_response &response = rpc.response();\n\n    ddebug_f(\"[{}@{}]: receive bulk load request\", request.pid, _primary_address_str);\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->get_bulk_loader()->on_bulk_load(request, response);\n    } else {\n        derror_f(\"replica({}) is not existed\", request.pid);\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_stub::on_group_bulk_load(group_bulk_load_rpc rpc)\n{\n    const group_bulk_load_request &request = rpc.request();\n    group_bulk_load_response &response = rpc.response();\n\n    ddebug_f(\"[{}@{}]: received group bulk load request, primary = {}, ballot = {}, \"\n             \"meta_bulk_load_status = {}\",\n             request.config.pid,\n             _primary_address_str,\n             request.config.primary.to_string(),\n             request.config.ballot,\n             enum_to_string(request.meta_bulk_load_status));\n\n    replica_ptr rep = get_replica(request.config.pid);\n    if (rep != nullptr) {\n        rep->get_bulk_loader()->on_group_bulk_load(request, response);\n    } else {\n        derror_f(\"replica({}) is not existed\", request.config.pid);\n        response.err = ERR_OBJECT_NOT_FOUND;\n    }\n}\n\nvoid replica_stub::on_detect_hotkey(detect_hotkey_rpc rpc)\n{\n    const auto &request = rpc.request();\n    auto &response = rpc.response();\n\n    ddebug_f(\"[{}@{}]: received detect hotkey request, hotkey_type = {}, detect_action = {}\",\n             request.pid,\n             _primary_address_str,\n             enum_to_string(request.type),\n             enum_to_string(request.action));\n\n    replica_ptr rep = get_replica(request.pid);\n    if (rep != nullptr) {\n        rep->on_detect_hotkey(request, response);\n    } else {\n        response.err = ERR_OBJECT_NOT_FOUND;\n        response.err_hint = fmt::format(\"not find the replica {} \\n\", request.pid);\n    }\n}\n\nvoid replica_stub::query_app_data_version(\n    int32_t app_id, /*pidx => data_version*/ std::unordered_map<int32_t, uint32_t> &version_map)\n{\n    zauto_read_lock l(_replicas_lock);\n    for (const auto &kv : _replicas) {\n        if (kv.first.get_app_id() == app_id) {\n            replica_ptr rep = kv.second;\n            if (rep != nullptr) {\n                uint32_t data_version = rep->query_data_version();\n                version_map[kv.first.get_partition_index()] = data_version;\n            }\n        }\n    }\n}\n\nvoid replica_stub::query_app_manual_compact_status(\n    int32_t app_id, std::unordered_map<gpid, manual_compaction_status::type> &status)\n{\n    zauto_read_lock l(_replicas_lock);\n    for (auto it = _replicas.begin(); it != _replicas.end(); ++it) {\n        if (it->first.get_app_id() == app_id) {\n            status[it->first] = it->second->get_manual_compact_status();\n        }\n    }\n}\n\nvoid replica_stub::update_disks_status()\n{\n    for (const auto &dir_node : _fs_manager._status_updated_dir_nodes) {\n        for (const auto &holding_replicas : dir_node->holding_replicas) {\n            const std::set<gpid> &pids = holding_replicas.second;\n            for (const auto &pid : pids) {\n                replica_ptr replica = get_replica(pid);\n                if (replica == nullptr) {\n                    continue;\n                }\n                replica->set_disk_status(dir_node->status);\n                ddebug_f(\"{} update disk_status to {}\",\n                         replica->name(),\n                         enum_to_string(replica->get_disk_status()));\n            }\n        }\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_stub.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n//\n// the replica_stub is the *singleton* entry to\n// access all replica managed in the same process\n//   replica_stub(singleton) --> replica --> replication_app_base\n//\n\n#include <functional>\n#include <tuple>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n#include <dsn/dist/failure_detector_multimaster.h>\n#include <dsn/dist/nfs_node.h>\n\n#include \"common/replication_common.h\"\n#include \"common/bulk_load_common.h\"\n#include \"common/fs_manager.h\"\n#include \"block_service/block_service_manager.h\"\n#include \"replica.h\"\n\nnamespace dsn {\nnamespace replication {\n\nDSN_DECLARE_uint32(max_concurrent_manual_emergency_checkpointing_count);\n\ntypedef rpc_holder<group_check_response, learn_notify_response> learn_completion_notification_rpc;\ntypedef rpc_holder<group_check_request, group_check_response> group_check_rpc;\ntypedef rpc_holder<query_replica_decree_request, query_replica_decree_response>\n    query_replica_decree_rpc;\ntypedef rpc_holder<query_replica_info_request, query_replica_info_response> query_replica_info_rpc;\ntypedef rpc_holder<learn_request, learn_response> query_last_checkpoint_info_rpc;\ntypedef rpc_holder<query_disk_info_request, query_disk_info_response> query_disk_info_rpc;\ntypedef rpc_holder<replica_disk_migrate_request, replica_disk_migrate_response>\n    replica_disk_migrate_rpc;\ntypedef rpc_holder<query_app_info_request, query_app_info_response> query_app_info_rpc;\ntypedef rpc_holder<notify_catch_up_request, notify_cacth_up_response> notify_catch_up_rpc;\ntypedef rpc_holder<update_child_group_partition_count_request,\n                   update_child_group_partition_count_response>\n    update_child_group_partition_count_rpc;\ntypedef rpc_holder<group_bulk_load_request, group_bulk_load_response> group_bulk_load_rpc;\ntypedef rpc_holder<detect_hotkey_request, detect_hotkey_response> detect_hotkey_rpc;\ntypedef rpc_holder<add_new_disk_request, add_new_disk_response> add_new_disk_rpc;\n\nclass mutation_log;\nnamespace test {\nclass test_checker;\n}\nclass cold_backup_context;\nclass replica_split_manager;\n\ntypedef std::unordered_map<gpid, replica_ptr> replicas;\ntypedef std::function<void(\n    ::dsn::rpc_address /*from*/, const replica_configuration & /*new_config*/, bool /*is_closing*/)>\n    replica_state_subscriber;\n\nclass replica_stub;\ntypedef dsn::ref_ptr<replica_stub> replica_stub_ptr;\n\nclass duplication_sync_timer;\nclass replica_bulk_loader;\nclass replica_backup_server;\nclass replica_split_manager;\n\nclass replica_stub : public serverlet<replica_stub>, public ref_counter\n{\npublic:\n    static bool s_not_exit_on_log_failure; // for test\n\npublic:\n    replica_stub(replica_state_subscriber subscriber = nullptr, bool is_long_subscriber = true);\n    ~replica_stub(void);\n\n    //\n    // initialization\n    //\n    void initialize(const replication_options &opts, bool clear = false);\n    void initialize(bool clear = false);\n    void initialize_fs_manager(std::vector<std::string> &data_dirs,\n                               std::vector<std::string> &data_dir_tags);\n    void set_options(const replication_options &opts) { _options = opts; }\n    void open_service();\n    void close();\n\n    //\n    //    requests from clients\n    //\n    void on_client_write(gpid id, dsn::message_ex *request);\n    void on_client_read(gpid id, dsn::message_ex *request);\n\n    //\n    //    messages from meta server\n    //\n    void on_config_proposal(const configuration_update_request &proposal);\n    void on_query_decree(query_replica_decree_rpc rpc);\n    void on_query_replica_info(query_replica_info_rpc rpc);\n    void on_query_app_info(query_app_info_rpc rpc);\n    void on_bulk_load(bulk_load_rpc rpc);\n\n    //\n    //    messages from peers (primary or secondary)\n    //        - prepare\n    //        - commit\n    //        - learn\n    //        - bulk_load\n    //\n    void on_prepare(dsn::message_ex *request);\n    void on_learn(dsn::message_ex *msg);\n    void on_learn_completion_notification(learn_completion_notification_rpc rpc);\n    void on_add_learner(const group_check_request &request);\n    void on_remove(const replica_configuration &request);\n    void on_group_check(group_check_rpc rpc);\n    void on_group_bulk_load(group_bulk_load_rpc rpc);\n\n    //\n    //    local messages\n    //\n    void on_meta_server_connected();\n    void on_meta_server_disconnected();\n    void on_gc();\n    void on_disk_stat();\n\n    //\n    //  routines published for test\n    //\n    void init_gc_for_test();\n    void set_meta_server_disconnected_for_test() { on_meta_server_disconnected(); }\n    void set_meta_server_connected_for_test(const configuration_query_by_node_response &config);\n    void set_replica_state_subscriber_for_test(replica_state_subscriber subscriber,\n                                               bool is_long_subscriber);\n\n    //\n    // common routines for inquiry\n    //\n    replica_ptr get_replica(gpid id) const;\n    replication_options &options() { return _options; }\n    const replication_options &options() const { return _options; }\n    bool is_connected() const { return NS_Connected == _state; }\n    virtual rpc_address get_meta_server_address() const { return _failure_detector->get_servers(); }\n    rpc_address primary_address() const { return _primary_address; }\n\n    std::string get_replica_dir(const char *app_type, gpid id, bool create_new = true);\n\n    // during partition split, we should gurantee child replica and parent replica share the\n    // same data dir\n    std::string get_child_dir(const char *app_type, gpid child_pid, const std::string &parent_dir);\n\n    //\n    // helper methods\n    //\n\n    // execute command function on specified or all replicas.\n    //   - if allow_empty_args = true and args is empty, then apply on all replicas.\n    //   - if allow_empty_args = false, you should specify at least one argument.\n    // each argument should be in format of:\n    //     id1,id2... (where id is 'app_id' or 'app_id.partition_id')\n    std::string exec_command_on_replica(const std::vector<std::string> &args,\n                                        bool allow_empty_args,\n                                        std::function<std::string(const replica_ptr &rep)> func);\n\n    //\n    // partition split\n    //\n\n    // called by parent partition, executed by child partition\n    void create_child_replica(dsn::rpc_address primary_address,\n                              app_info app,\n                              ballot init_ballot,\n                              gpid child_gpid,\n                              gpid parent_gpid,\n                              const std::string &parent_dir);\n\n    // create a new replica instance if not found\n    // return nullptr when failed to create new replica\n    replica_ptr\n    create_child_replica_if_not_found(gpid child_pid, app_info *app, const std::string &parent_dir);\n\n    typedef std::function<void(replica_split_manager *split_mgr)> local_execution;\n\n    // This function is used for partition split, caller(replica)\n    // parent/child may want child/parent to execute function during partition split\n    // if replica `pid` exists, will execute function `handler` and return ERR_OK, otherwise return\n    // ERR_OBJECT_NOT_FOUND\n    dsn::error_code split_replica_exec(dsn::task_code code, gpid pid, local_execution handler);\n\n    // This function is used for partition split error handler\n    void split_replica_error_handler(gpid pid, local_execution handler);\n\n    // on primary parent partition, child notify itself has been caught up parent\n    void on_notify_primary_split_catch_up(notify_catch_up_rpc rpc);\n\n    // on child partition, update new partition count\n    void on_update_child_group_partition_count(update_child_group_partition_count_rpc rpc);\n\n    // TODO: (Tangyanzhao) add some comments\n    void on_detect_hotkey(detect_hotkey_rpc rpc);\n\n    void on_query_disk_info(query_disk_info_rpc rpc);\n    void on_disk_migrate(replica_disk_migrate_rpc rpc);\n\n    // query partitions compact status by app_id\n    void query_app_manual_compact_status(\n        int32_t app_id, /*out*/ std::unordered_map<gpid, manual_compaction_status::type> &status);\n\n    void on_add_new_disk(add_new_disk_rpc rpc);\n\n    // query last checkpoint info for follower in duplication process\n    void on_query_last_checkpoint(query_last_checkpoint_info_rpc rpc);\n\nprivate:\n    enum replica_node_state\n    {\n        NS_Disconnected,\n        NS_Connecting,\n        NS_Connected\n    };\n\n    enum replica_life_cycle\n    {\n        RL_invalid,\n        RL_creating,\n        RL_serving,\n        RL_closing,\n        RL_closed\n    };\n\n    void initialize_start();\n    void query_configuration_by_node();\n    void on_meta_server_disconnected_scatter(replica_stub_ptr this_, gpid id);\n    void on_node_query_reply(error_code err, dsn::message_ex *request, dsn::message_ex *response);\n    void on_node_query_reply_scatter(replica_stub_ptr this_,\n                                     const configuration_update_request &config);\n    void on_node_query_reply_scatter2(replica_stub_ptr this_, gpid id);\n    void remove_replica_on_meta_server(const app_info &info, const partition_configuration &config);\n    task_ptr begin_open_replica(const app_info &app,\n                                gpid id,\n                                const std::shared_ptr<group_check_request> &req,\n                                const std::shared_ptr<configuration_update_request> &req2);\n    void open_replica(const app_info &app,\n                      gpid id,\n                      const std::shared_ptr<group_check_request> &req,\n                      const std::shared_ptr<configuration_update_request> &req2);\n    task_ptr begin_close_replica(replica_ptr r);\n    void close_replica(replica_ptr r);\n    void notify_replica_state_update(const replica_configuration &config, bool is_closing);\n    void trigger_checkpoint(replica_ptr r, bool is_emergency);\n    void handle_log_failure(error_code err);\n\n    void install_perf_counters();\n    dsn::error_code on_kill_replica(gpid id);\n\n    void get_replica_info(/*out*/ replica_info &info, /*in*/ replica_ptr r);\n    void get_local_replicas(/*out*/ std::vector<replica_info> &replicas);\n    replica_life_cycle get_replica_life_cycle(gpid id);\n    void on_gc_replica(replica_stub_ptr this_, gpid id);\n\n    void response_client(gpid id,\n                         bool is_read,\n                         dsn::message_ex *request,\n                         partition_status::type status,\n                         error_code error);\n    void update_disk_holding_replicas();\n\n    void update_disks_status();\n\n    void register_ctrl_command();\n\n    int get_app_id_from_replicas(std::string app_name)\n    {\n        for (const auto &replica : _replicas) {\n            const app_info &info = *(replica.second)->get_app_info();\n            if (info.app_name == app_name) {\n                return info.app_id;\n            }\n        }\n        return 0;\n    }\n\n    void query_app_data_version(\n        int32_t app_id,\n        /*pidx => data_version*/ std::unordered_map<int32_t, uint32_t> &version_map);\n\n#ifdef DSN_ENABLE_GPERF\n    // Try to release tcmalloc memory back to operating system\n    // If release_all = true, it will release all reserved-not-used memory\n    uint64_t gc_tcmalloc_memory(bool release_all);\n#endif\n\nprivate:\n    friend class ::dsn::replication::test::test_checker;\n    friend class ::dsn::replication::replica;\n    friend class ::dsn::replication::potential_secondary_context;\n    friend class ::dsn::replication::cold_backup_context;\n\n    friend class replica_duplicator;\n    friend class replica_http_service;\n    friend class replica_bulk_loader;\n    friend class replica_split_manager;\n    friend class replica_disk_migrator;\n\n    friend class mock_replica_stub;\n    friend class duplication_sync_timer;\n    friend class duplication_sync_timer_test;\n    friend class replica_duplicator_manager_test;\n    friend class duplication_test_base;\n    friend class replica_test;\n    friend class replica_disk_test_base;\n    friend class replica_disk_migrate_test;\n    friend class replica_stub_test_base;\n    friend class open_replica_test;\n    friend class replica_follower;\n    friend class replica_follower_test;\n\n    typedef std::unordered_map<gpid, ::dsn::task_ptr> opening_replicas;\n    typedef std::unordered_map<gpid, std::tuple<task_ptr, replica_ptr, app_info, replica_info>>\n        closing_replicas; // <gpid, <close_task, replica, app_info, replica_info> >\n    typedef std::map<gpid, std::pair<app_info, replica_info>>\n        closed_replicas; // <gpid, <app_info, replica_info> >\n\n    mutable zrwlock_nr _replicas_lock;\n    replicas _replicas;\n    opening_replicas _opening_replicas;\n    closing_replicas _closing_replicas;\n    closed_replicas _closed_replicas;\n\n    mutation_log_ptr _log;\n    ::dsn::rpc_address _primary_address;\n    char _primary_address_str[64];\n\n    std::shared_ptr<dsn::dist::slave_failure_detector_with_multimaster> _failure_detector;\n    mutable zlock _state_lock;\n    volatile replica_node_state _state;\n\n    // constants\n    replication_options _options;\n    replica_state_subscriber _replica_state_subscriber;\n    bool _is_long_subscriber;\n\n    // temproal states\n    ::dsn::task_ptr _config_query_task;\n    ::dsn::task_ptr _config_sync_timer_task;\n    ::dsn::task_ptr _gc_timer_task;\n    ::dsn::task_ptr _disk_stat_timer_task;\n    ::dsn::task_ptr _mem_release_timer_task;\n\n    std::unique_ptr<duplication_sync_timer> _duplication_sync_timer;\n    std::unique_ptr<replica_backup_server> _backup_server;\n\n    // command_handlers\n    dsn_handle_t _kill_partition_command;\n    dsn_handle_t _deny_client_command;\n    dsn_handle_t _verbose_client_log_command;\n    dsn_handle_t _verbose_commit_log_command;\n    dsn_handle_t _trigger_chkpt_command;\n    dsn_handle_t _query_compact_command;\n    dsn_handle_t _query_app_envs_command;\n#ifdef DSN_ENABLE_GPERF\n    dsn_handle_t _release_tcmalloc_memory_command;\n    dsn_handle_t _get_tcmalloc_status_command;\n    dsn_handle_t _max_reserved_memory_percentage_command;\n    dsn_handle_t _release_all_reserved_memory_command;\n#endif\n    dsn_handle_t _max_concurrent_bulk_load_downloading_count_command;\n\n    bool _deny_client;\n    bool _verbose_client_log;\n    bool _verbose_commit_log;\n    bool _release_tcmalloc_memory;\n    int32_t _mem_release_max_reserved_mem_percentage;\n    int32_t _max_concurrent_bulk_load_downloading_count;\n\n    // we limit LT_APP max concurrent count, because nfs service implementation is\n    // too simple, it do not support priority.\n    std::atomic_int _learn_app_concurrent_count;\n\n    // handle all the data dirs\n    fs_manager _fs_manager;\n\n    // handle all the block filesystems for current replica stub\n    // (in other words, current service node)\n    dist::block_service::block_service_manager _block_service_manager;\n\n    // nfs_node\n    std::unique_ptr<dsn::nfs_node> _nfs;\n\n    // write body size exceed this threshold will be logged and reject, 0 means no check\n    uint64_t _max_allowed_write_size;\n\n    // replica count executing bulk load downloading concurrently\n    std::atomic_int _bulk_load_downloading_count;\n\n    // replica count executing emergency checkpoint concurrently\n    std::atomic_int _manual_emergency_checkpointing_count;\n\n    bool _is_running;\n\n#ifdef DSN_ENABLE_GPERF\n    std::atomic_bool _is_releasing_memory{false};\n#endif\n\n    // performance counters\n    perf_counter_wrapper _counter_replicas_count;\n    perf_counter_wrapper _counter_replicas_opening_count;\n    perf_counter_wrapper _counter_replicas_closing_count;\n    perf_counter_wrapper _counter_replicas_commit_qps;\n\n    perf_counter_wrapper _counter_replicas_learning_count;\n    perf_counter_wrapper _counter_replicas_learning_max_duration_time_ms;\n    perf_counter_wrapper _counter_replicas_learning_max_copy_file_size;\n    perf_counter_wrapper _counter_replicas_learning_recent_start_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_round_start_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_copy_file_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_copy_file_size;\n    perf_counter_wrapper _counter_replicas_learning_recent_copy_buffer_size;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_cache_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_app_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_log_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_reset_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_fail_count;\n    perf_counter_wrapper _counter_replicas_learning_recent_learn_succ_count;\n\n    perf_counter_wrapper _counter_replicas_recent_prepare_fail_count;\n    perf_counter_wrapper _counter_replicas_recent_replica_move_error_count;\n    perf_counter_wrapper _counter_replicas_recent_replica_move_garbage_count;\n    perf_counter_wrapper _counter_replicas_recent_replica_remove_dir_count;\n    perf_counter_wrapper _counter_replicas_error_replica_dir_count;\n    perf_counter_wrapper _counter_replicas_garbage_replica_dir_count;\n    perf_counter_wrapper _counter_replicas_tmp_replica_dir_count;\n    perf_counter_wrapper _counter_replicas_origin_replica_dir_count;\n\n    perf_counter_wrapper _counter_replicas_recent_group_check_fail_count;\n\n    perf_counter_wrapper _counter_shared_log_size;\n    perf_counter_wrapper _counter_shared_log_recent_write_size;\n    perf_counter_wrapper _counter_recent_trigger_emergency_checkpoint_count;\n\n    // <- Duplication Metrics ->\n    // TODO(wutao1): calculate the counters independently for each remote cluster\n    //               if we need to duplicate to multiple clusters someday.\n    perf_counter_wrapper _counter_dup_confirmed_rate;\n    perf_counter_wrapper _counter_dup_pending_mutations_count;\n\n    perf_counter_wrapper _counter_cold_backup_running_count;\n    perf_counter_wrapper _counter_cold_backup_recent_start_count;\n    perf_counter_wrapper _counter_cold_backup_recent_succ_count;\n    perf_counter_wrapper _counter_cold_backup_recent_fail_count;\n    perf_counter_wrapper _counter_cold_backup_recent_cancel_count;\n    perf_counter_wrapper _counter_cold_backup_recent_pause_count;\n    perf_counter_wrapper _counter_cold_backup_recent_upload_file_succ_count;\n    perf_counter_wrapper _counter_cold_backup_recent_upload_file_fail_count;\n    perf_counter_wrapper _counter_cold_backup_recent_upload_file_size;\n    perf_counter_wrapper _counter_cold_backup_max_duration_time_ms;\n    perf_counter_wrapper _counter_cold_backup_max_upload_file_size;\n\n    perf_counter_wrapper _counter_recent_read_fail_count;\n    perf_counter_wrapper _counter_recent_write_fail_count;\n    perf_counter_wrapper _counter_recent_read_busy_count;\n    perf_counter_wrapper _counter_recent_write_busy_count;\n\n    perf_counter_wrapper _counter_recent_write_size_exceed_threshold_count;\n\n#ifdef DSN_ENABLE_GPERF\n    perf_counter_wrapper _counter_tcmalloc_release_memory_size;\n#endif\n\n    // <- Bulk load Metrics ->\n    perf_counter_wrapper _counter_bulk_load_running_count;\n    perf_counter_wrapper _counter_bulk_load_downloading_count;\n    perf_counter_wrapper _counter_bulk_load_ingestion_count;\n    perf_counter_wrapper _counter_bulk_load_succeed_count;\n    perf_counter_wrapper _counter_bulk_load_failed_count;\n    perf_counter_wrapper _counter_bulk_load_download_file_succ_count;\n    perf_counter_wrapper _counter_bulk_load_download_file_fail_count;\n    perf_counter_wrapper _counter_bulk_load_download_file_size;\n    perf_counter_wrapper _counter_bulk_load_max_ingestion_time_ms;\n    perf_counter_wrapper _counter_bulk_load_max_duration_time_ms;\n\n    // <- Partition split Metrics ->\n    perf_counter_wrapper _counter_replicas_splitting_count;\n    perf_counter_wrapper _counter_replicas_splitting_max_duration_time_ms;\n    perf_counter_wrapper _counter_replicas_splitting_max_async_learn_time_ms;\n    perf_counter_wrapper _counter_replicas_splitting_max_copy_file_size;\n    perf_counter_wrapper _counter_replicas_splitting_recent_start_count;\n    perf_counter_wrapper _counter_replicas_splitting_recent_copy_file_count;\n    perf_counter_wrapper _counter_replicas_splitting_recent_copy_file_size;\n    perf_counter_wrapper _counter_replicas_splitting_recent_copy_mutation_count;\n    perf_counter_wrapper _counter_replicas_splitting_recent_split_fail_count;\n    perf_counter_wrapper _counter_replicas_splitting_recent_split_succ_count;\n\n    dsn::task_tracker _tracker;\n};\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_test_utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/replica_test_utils.h>\n\n#include \"replica.h\"\n#include \"replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass mock_replica : public replica\n{\npublic:\n    mock_replica(replica_stub *stub,\n                 const gpid &gpid,\n                 const app_info &app,\n                 const char *dir,\n                 bool restore_if_necessary,\n                 bool is_duplication_follower)\n        : replica(stub, gpid, app, dir, restore_if_necessary, is_duplication_follower)\n    {\n    }\n};\n\nreplica *create_test_replica(replica_stub *stub,\n                             gpid gpid,\n                             const app_info &app,\n                             const char *dir,\n                             bool restore_if_necessary,\n                             bool is_duplication_follower)\n{\n    return new mock_replica(stub, gpid, app, dir, restore_if_necessary, is_duplication_follower);\n}\n\nreplica_stub *create_test_replica_stub() { return new replica_stub(); }\n\nvoid destroy_replica(replica *r) { delete r; }\n\nvoid destroy_replica_stub(replica_stub *rs) { delete rs; }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replica_throttle.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"mutation_log.h\"\n#include \"replica_stub.h\"\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replica_envs.h>\n\nnamespace dsn {\nnamespace replication {\n\n#define THROTTLE_REQUEST(op_type, throttling_type, request, request_units)                         \\\n    do {                                                                                           \\\n        int64_t delay_ms = 0;                                                                      \\\n        auto type = _##op_type##_##throttling_type##_throttling_controller.control(                \\\n            request->header->client.timeout_ms, request_units, delay_ms);                          \\\n        if (type != throttling_controller::PASS) {                                                 \\\n            if (type == throttling_controller::DELAY) {                                            \\\n                tasking::enqueue(                                                                  \\\n                    LPC_##op_type##_THROTTLING_DELAY,                                              \\\n                    &_tracker,                                                                     \\\n                    [ this, req = message_ptr(request) ]() { on_client_##op_type(req, true); },    \\\n                    get_gpid().thread_hash(),                                                      \\\n                    std::chrono::milliseconds(delay_ms));                                          \\\n                _counter_recent_##op_type##_throttling_delay_count->increment();                   \\\n            } else { /** type == throttling_controller::REJECT **/                                 \\\n                if (delay_ms > 0) {                                                                \\\n                    tasking::enqueue(LPC_##op_type##_THROTTLING_DELAY,                             \\\n                                     &_tracker,                                                    \\\n                                     [ this, req = message_ptr(request) ]() {                      \\\n                                         response_client_##op_type(req, ERR_BUSY);                 \\\n                                     },                                                            \\\n                                     get_gpid().thread_hash(),                                     \\\n                                     std::chrono::milliseconds(delay_ms));                         \\\n                } else {                                                                           \\\n                    response_client_##op_type(request, ERR_BUSY);                                  \\\n                }                                                                                  \\\n                _counter_recent_##op_type##_throttling_reject_count->increment();                  \\\n            }                                                                                      \\\n            return true;                                                                           \\\n        }                                                                                          \\\n    } while (0)\n\nbool replica::throttle_write_request(message_ex *request)\n{\n    THROTTLE_REQUEST(write, qps, request, 1);\n    THROTTLE_REQUEST(write, size, request, request->body_size());\n    return false;\n}\n\nbool replica::throttle_read_request(message_ex *request)\n{\n    THROTTLE_REQUEST(read, qps, request, 1);\n    return false;\n}\n\nbool replica::throttle_backup_request(message_ex *request)\n{\n    int64_t delay_ms = 0;\n    auto type = _backup_request_qps_throttling_controller.control(\n        request->header->client.timeout_ms, 1, delay_ms);\n    if (type != throttling_controller::PASS) {\n        if (type == throttling_controller::DELAY) {\n            tasking::enqueue(LPC_read_THROTTLING_DELAY,\n                             &_tracker,\n                             [ this, req = message_ptr(request) ]() { on_client_read(req, true); },\n                             get_gpid().thread_hash(),\n                             std::chrono::milliseconds(delay_ms));\n            _counter_recent_backup_request_throttling_delay_count->increment();\n        } else { /** type == throttling_controller::REJECT **/\n            _counter_recent_backup_request_throttling_reject_count->increment();\n        }\n        return true;\n    }\n    return false;\n}\n\nvoid replica::update_throttle_envs(const std::map<std::string, std::string> &envs)\n{\n    update_throttle_env_internal(\n        envs, replica_envs::WRITE_QPS_THROTTLING, _write_qps_throttling_controller);\n    update_throttle_env_internal(\n        envs, replica_envs::WRITE_SIZE_THROTTLING, _write_size_throttling_controller);\n    update_throttle_env_internal(\n        envs, replica_envs::READ_QPS_THROTTLING, _read_qps_throttling_controller);\n    update_throttle_env_internal(envs,\n                                 replica_envs::BACKUP_REQUEST_QPS_THROTTLING,\n                                 _backup_request_qps_throttling_controller);\n}\n\nvoid replica::update_throttle_env_internal(const std::map<std::string, std::string> &envs,\n                                           const std::string &key,\n                                           throttling_controller &cntl)\n{\n    bool throttling_changed = false;\n    std::string old_throttling;\n    std::string parse_error;\n    auto find = envs.find(key);\n    if (find != envs.end()) {\n        if (!cntl.parse_from_env(find->second,\n                                 _app_info.partition_count,\n                                 parse_error,\n                                 throttling_changed,\n                                 old_throttling)) {\n            dwarn_replica(\"parse env failed, key = \\\"{}\\\", value = \\\"{}\\\", error = \\\"{}\\\"\",\n                          key,\n                          find->second,\n                          parse_error);\n            // reset if parse failed\n            cntl.reset(throttling_changed, old_throttling);\n        }\n    } else {\n        // reset if env not found\n        cntl.reset(throttling_changed, old_throttling);\n    }\n    if (throttling_changed) {\n        ddebug_replica(\"switch {} from \\\"{}\\\" to \\\"{}\\\"\", key, old_throttling, cntl.env_value());\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replication_app_base.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica.h\"\n#include \"mutation.h\"\n#include \"common/bulk_load_common.h\"\n#include \"dsn/dist/replication/duplication_common.h\"\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/crc.h>\n#include <dsn/service_api_c.h>\n#include <fstream>\n#include <sstream>\n#include <memory>\n#include <dsn/utility/fail_point.h>\n#include <dsn/dist/replication/replica_envs.h>\n\nnamespace dsn {\nnamespace replication {\n\nconst std::string replica_init_info::kInitInfo = \".init-info\";\n\nDEFINE_TASK_CODE_AIO(LPC_AIO_INFO_WRITE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nnamespace {\nerror_code write_blob_to_file(const std::string &file, const blob &data)\n{\n    std::string tmp_file = file + \".tmp\";\n    disk_file *hfile = file::open(tmp_file.c_str(), O_WRONLY | O_CREAT | O_BINARY | O_TRUNC, 0666);\n    ERR_LOG_AND_RETURN_NOT_TRUE(hfile, ERR_FILE_OPERATION_FAILED, \"open file {} failed\", tmp_file);\n    auto cleanup = defer([tmp_file]() { utils::filesystem::remove_path(tmp_file); });\n\n    error_code err;\n    size_t sz = 0;\n    task_tracker tracker;\n    aio_task_ptr tsk = file::write(hfile,\n                                   data.data(),\n                                   data.length(),\n                                   0,\n                                   LPC_AIO_INFO_WRITE,\n                                   &tracker,\n                                   [&err, &sz](error_code e, size_t s) {\n                                       err = e;\n                                       sz = s;\n                                   },\n                                   0);\n    dassert_f(tsk, \"create file::write task failed\");\n    tracker.wait_outstanding_tasks();\n    file::flush(hfile);\n    file::close(hfile);\n    ERR_LOG_AND_RETURN_NOT_OK(err, \"write file {} failed\", tmp_file);\n    dcheck_eq(data.length(), sz);\n    // TODO(yingchun): need fsync too？\n    ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::rename_path(tmp_file, file),\n                                ERR_FILE_OPERATION_FAILED,\n                                \"move file from {} to {} failed\",\n                                tmp_file,\n                                file);\n\n    return ERR_OK;\n}\n} // namespace\n\nerror_code replica_init_info::load(const std::string &dir)\n{\n    std::string info_path = utils::filesystem::path_combine(dir, kInitInfo);\n    dassert_f(utils::filesystem::path_exists(info_path), \"file({}) not exist\", info_path);\n    ERR_LOG_AND_RETURN_NOT_OK(\n        load_json(info_path), \"load replica_init_info from {} failed\", info_path);\n    ddebug_f(\"load replica_init_info from {} succeed: {}\", info_path, to_string());\n    return ERR_OK;\n}\n\nerror_code replica_init_info::store(const std::string &dir)\n{\n    uint64_t start = dsn_now_ns();\n    std::string info_path = utils::filesystem::path_combine(dir, kInitInfo);\n    ERR_LOG_AND_RETURN_NOT_OK(store_json(info_path),\n                              \"store replica_init_info to {} failed, time_used_ns = {}\",\n                              info_path,\n                              dsn_now_ns() - start);\n    ddebug_f(\"store replica_init_info to {} succeed, time_used_ns = {}: {}\",\n             info_path,\n             dsn_now_ns() - start,\n             to_string());\n    return ERR_OK;\n}\n\nerror_code replica_init_info::load_json(const std::string &file)\n{\n    std::ifstream is(file, std::ios::binary);\n    ERR_LOG_AND_RETURN_NOT_TRUE(\n        is.is_open(), ERR_FILE_OPERATION_FAILED, \"open file {} failed\", file);\n\n    int64_t sz = 0;\n    ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::file_size(std::string(file), sz),\n                                ERR_FILE_OPERATION_FAILED,\n                                \"get file size of {} failed\",\n                                file);\n\n    std::shared_ptr<char> buffer(utils::make_shared_array<char>(sz));\n    is.read((char *)buffer.get(), sz);\n    ERR_LOG_AND_RETURN_NOT_TRUE(!is.bad(), ERR_FILE_OPERATION_FAILED, \"read file {} failed\", file);\n    is.close();\n\n    ERR_LOG_AND_RETURN_NOT_TRUE(\n        json::json_forwarder<replica_init_info>::decode(blob(buffer, sz), *this),\n        ERR_FILE_OPERATION_FAILED,\n        \"decode json from file {} failed\",\n        file);\n\n    return ERR_OK;\n}\n\nerror_code replica_init_info::store_json(const std::string &file)\n{\n    return write_blob_to_file(file, json::json_forwarder<replica_init_info>::encode(*this));\n}\n\nstd::string replica_init_info::to_string()\n{\n    // TODO(yingchun): use fmt instead\n    std::ostringstream oss;\n    oss << \"init_ballot = \" << init_ballot << \", init_durable_decree = \" << init_durable_decree\n        << \", init_offset_in_shared_log = \" << init_offset_in_shared_log\n        << \", init_offset_in_private_log = \" << init_offset_in_private_log;\n    return oss.str();\n}\n\nerror_code replica_app_info::load(const std::string &file)\n{\n    std::ifstream is(file, std::ios::binary);\n    ERR_LOG_AND_RETURN_NOT_TRUE(\n        is.is_open(), ERR_FILE_OPERATION_FAILED, \"open file {} failed\", file);\n\n    int64_t sz = 0;\n    ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::file_size(std::string(file), sz),\n                                ERR_FILE_OPERATION_FAILED,\n                                \"get file size of {} failed\",\n                                file);\n\n    std::shared_ptr<char> buffer(utils::make_shared_array<char>(sz));\n    is.read((char *)buffer.get(), sz);\n    is.close();\n\n    binary_reader reader(blob(buffer, sz));\n    int magic;\n    unmarshall(reader, magic, DSF_THRIFT_BINARY);\n\n    ERR_LOG_AND_RETURN_NOT_TRUE(\n        magic == 0xdeadbeef, ERR_INVALID_DATA, \"data in file {} is invalid (magic)\", file);\n\n    unmarshall(reader, *_app, DSF_THRIFT_JSON);\n    return ERR_OK;\n}\n\nerror_code replica_app_info::store(const std::string &file)\n{\n    binary_writer writer;\n    int magic = 0xdeadbeef;\n\n    marshall(writer, magic, DSF_THRIFT_BINARY);\n    if (_app->envs.empty()) {\n        marshall(writer, *_app, DSF_THRIFT_JSON);\n    } else {\n        // for most envs, do not persistent them to app info file\n        // ROCKSDB_ALLOW_INGEST_BEHIND should be persistent\n        app_info tmp = *_app;\n        tmp.envs.clear();\n        const auto &iter = _app->envs.find(replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND);\n        if (iter != _app->envs.end()) {\n            tmp.envs[replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND] = iter->second;\n        }\n        marshall(writer, tmp, DSF_THRIFT_JSON);\n    }\n\n    return write_blob_to_file(file, writer.get_buffer());\n}\n\n/*static*/\nvoid replication_app_base::register_storage_engine(const std::string &name, factory f)\n{\n    utils::factory_store<replication_app_base>::register_factory(\n        name.c_str(), f, PROVIDER_TYPE_MAIN);\n}\n/*static*/\nreplication_app_base *replication_app_base::new_storage_instance(const std::string &name,\n                                                                 replica *r)\n{\n    return utils::factory_store<replication_app_base>::create(name.c_str(), PROVIDER_TYPE_MAIN, r);\n}\n\nreplication_app_base::replication_app_base(replica *replica) : replica_base(replica)\n{\n    _dir_data = utils::filesystem::path_combine(replica->dir(), \"data\");\n    _dir_learn = utils::filesystem::path_combine(replica->dir(), \"learn\");\n    _dir_backup = utils::filesystem::path_combine(replica->dir(), \"backup\");\n    _dir_bulk_load = utils::filesystem::path_combine(replica->dir(),\n                                                     bulk_load_constant::BULK_LOAD_LOCAL_ROOT_DIR);\n    _dir_duplication = utils::filesystem::path_combine(\n        replica->dir(), duplication_constants::kDuplicationCheckpointRootDir);\n    _last_committed_decree = 0;\n    _replica = replica;\n}\n\nbool replication_app_base::is_primary() const\n{\n    return _replica->status() == partition_status::PS_PRIMARY;\n}\n\nbool replication_app_base::is_duplication_master() const\n{\n    return _replica->is_duplication_master();\n}\n\nbool replication_app_base::is_duplication_follower() const\n{\n    return _replica->is_duplication_follower();\n}\n\nconst ballot &replication_app_base::get_ballot() const { return _replica->get_ballot(); }\n\nerror_code replication_app_base::open_internal(replica *r)\n{\n    ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::directory_exists(_dir_data),\n                                ERR_FILE_OPERATION_FAILED,\n                                \"[{}]: replica data dir {} does not exist\",\n                                r->name(),\n                                _dir_data);\n\n    ERR_LOG_AND_RETURN_NOT_OK(open(), \"[{}]: open replica app failed\", r->name());\n\n    _last_committed_decree = last_durable_decree();\n\n    auto err = _info.load(r->dir());\n    ERR_LOG_AND_RETURN_NOT_OK(err, \"[{}]: load replica_init_info failed\", r->name());\n\n    ERR_LOG_AND_RETURN_NOT_TRUE(err != ERR_OK || last_durable_decree() >= _info.init_durable_decree,\n                                ERR_INCOMPLETE_DATA,\n                                \"[{}]: replica data is not complete coz \"\n                                \"last_durable_decree({}) < init_durable_decree({})\",\n                                r->name(),\n                                last_durable_decree(),\n                                _info.init_durable_decree);\n\n    return ERR_OK;\n}\n\nerror_code replication_app_base::open_new_internal(replica *r,\n                                                   int64_t shared_log_start,\n                                                   int64_t private_log_start)\n{\n    dassert_f(utils::filesystem::remove_path(_dir_data), \"remove data dir {} failed\", _dir_data);\n    dassert_f(\n        utils::filesystem::create_directory(_dir_data), \"create data dir {} failed\", _dir_data);\n    ERR_LOG_AND_RETURN_NOT_TRUE(utils::filesystem::directory_exists(_dir_data),\n                                ERR_FILE_OPERATION_FAILED,\n                                \"[{}]: create replica data dir {} failed\",\n                                r->name(),\n                                _dir_data);\n\n    ERR_LOG_AND_RETURN_NOT_OK(open(), \"[{}]: open replica app failed\", r->name());\n    _last_committed_decree = last_durable_decree();\n    ERR_LOG_AND_RETURN_NOT_OK(update_init_info(_replica, shared_log_start, private_log_start, 0),\n                              \"[{}]: open replica app failed\",\n                              r->name());\n    return ERR_OK;\n}\n\nerror_code replication_app_base::open()\n{\n    const app_info *info = get_app_info();\n    int argc = 1;\n    argc += (2 * info->envs.size());\n    // check whether replica have some extra envs that meta don't known\n    const std::map<std::string, std::string> &extra_envs = _replica->get_replica_extra_envs();\n    argc += (2 * extra_envs.size());\n\n    std::unique_ptr<char *[]> argvs = make_unique<char *[]>(argc);\n    char **argv = argvs.get();\n    dassert(argv != nullptr, \"\");\n    int idx = 0;\n    argv[idx++] = (char *)(info->app_name.c_str());\n    if (argc > 1) {\n        for (auto &kv : info->envs) {\n            argv[idx++] = (char *)(kv.first.c_str());\n            argv[idx++] = (char *)(kv.second.c_str());\n        }\n\n        // combine extra envs\n        for (auto &kv : extra_envs) {\n            argv[idx++] = (char *)(kv.first.c_str());\n            argv[idx++] = (char *)(kv.second.c_str());\n        }\n    }\n    dcheck_eq(argc, idx);\n\n    return start(argc, argv);\n}\n\nerror_code replication_app_base::close(bool clear_state)\n{\n    ERR_LOG_AND_RETURN_NOT_OK(stop(clear_state), \"[{}]: stop storage failed\", replica_name());\n\n    _last_committed_decree.store(0);\n\n    return ERR_OK;\n}\n\nerror_code replication_app_base::apply_checkpoint(chkpt_apply_mode mode, const learn_state &state)\n{\n    int64_t current_commit_decree = last_committed_decree();\n    error_code err = storage_apply_checkpoint(mode, state);\n    if (ERR_OK == err && state.to_decree_included > current_commit_decree) {\n        _last_committed_decree.store(state.to_decree_included);\n    }\n    return err;\n}\n\nint replication_app_base::on_batched_write_requests(int64_t decree,\n                                                    uint64_t timestamp,\n                                                    message_ex **requests,\n                                                    int request_length)\n{\n    int storage_error = 0;\n    for (int i = 0; i < request_length; ++i) {\n        // TODO(yingchun): better to return error_code\n        int e = on_request(requests[i]);\n        if (e != 0) {\n            derror_replica(\"got storage error when handler request({})\",\n                           requests[i]->header->rpc_name);\n            storage_error = e;\n        }\n    }\n    return storage_error;\n}\n\nerror_code replication_app_base::apply_mutation(const mutation *mu)\n{\n    FAIL_POINT_INJECT_F(\"replication_app_base_apply_mutation\", [](string_view) { return ERR_OK; });\n\n    dcheck_eq_replica(mu->data.header.decree, last_committed_decree() + 1);\n    dcheck_eq_replica(mu->data.updates.size(), mu->client_requests.size());\n    dcheck_gt_replica(mu->data.updates.size(), 0);\n\n    if (_replica->status() == partition_status::PS_PRIMARY) {\n        ADD_POINT(mu->_tracer);\n    }\n\n    bool has_ingestion_request = false;\n    int request_count = static_cast<int>(mu->client_requests.size());\n    message_ex **batched_requests = (message_ex **)alloca(sizeof(message_ex *) * request_count);\n    message_ex **faked_requests = (message_ex **)alloca(sizeof(message_ex *) * request_count);\n    int batched_count = 0; // write-empties are not included.\n    int faked_count = 0;\n    for (int i = 0; i < request_count; i++) {\n        const mutation_update &update = mu->data.updates[i];\n        message_ex *req = mu->client_requests[i];\n        dinfo_replica(\"mutation {} #{}: dispatch rpc call {}\", mu->name(), i, update.code);\n        if (update.code != RPC_REPLICATION_WRITE_EMPTY) {\n            if (req == nullptr) {\n                req = message_ex::create_received_request(\n                    update.code,\n                    (dsn_msg_serialize_format)update.serialization_type,\n                    (void *)update.data.data(),\n                    update.data.length());\n                faked_requests[faked_count++] = req;\n            }\n\n            batched_requests[batched_count++] = req;\n            if (update.code == apps::RPC_RRDB_RRDB_BULK_LOAD) {\n                has_ingestion_request = true;\n            }\n        }\n    }\n\n    int perror = on_batched_write_requests(\n        mu->data.header.decree, mu->data.header.timestamp, batched_requests, batched_count);\n\n    // release faked requests\n    for (int i = 0; i < faked_count; i++) {\n        faked_requests[i]->release_ref();\n    }\n\n    if (perror != 0) {\n        derror_replica(\"mutation {}: get internal error {}\", mu->name(), perror);\n        // for normal write requests, if got rocksdb error, this replica will be set error and evoke\n        // learn for ingestion requests, should not do as normal write requests, there are two\n        // reasons:\n        // 1. all ingestion errors should be handled by meta server in function\n        // `on_partition_ingestion_reply`, rocksdb error will be returned to meta server in\n        // structure `ingestion_response`, not in this function\n        // 2. if replica apply ingestion mutation during learn, it may got error from rocksdb,\n        // because the external sst files may not exist, in this case, we won't consider it as an\n        // error\n        if (!has_ingestion_request) {\n            return ERR_LOCAL_APP_FAILURE;\n        }\n    }\n\n    ++_last_committed_decree;\n\n    if (_replica->verbose_commit_log()) {\n        auto status = _replica->status();\n        const char *str;\n        switch (status) {\n        case partition_status::PS_INACTIVE:\n            str = \"I\";\n            break;\n        case partition_status::PS_PRIMARY:\n            str = \"P\";\n            break;\n        case partition_status::PS_SECONDARY:\n            str = \"S\";\n            break;\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            str = \"PS\";\n            break;\n        default:\n            dassert_replica(false, \"status = {}\", enum_to_string(status));\n            __builtin_unreachable();\n        }\n        ddebug_replica(\n            \"mutation {} committed on {}, batched_count = {}\", mu->name(), str, batched_count);\n    }\n\n    _replica->update_commit_qps(batched_count);\n\n    return ERR_OK;\n}\n\nerror_code replication_app_base::update_init_info(replica *r,\n                                                  int64_t shared_log_offset,\n                                                  int64_t private_log_offset,\n                                                  int64_t durable_decree)\n{\n    _info.crc = 0;\n    _info.magic = 0xdeadbeef;\n    _info.init_ballot = r->get_ballot();\n    _info.init_durable_decree = durable_decree;\n    _info.init_offset_in_shared_log = shared_log_offset;\n    _info.init_offset_in_private_log = private_log_offset;\n\n    ERR_LOG_AND_RETURN_NOT_OK(\n        _info.store(r->dir()), \"[{}]: store replica_init_info failed\", r->name());\n\n    return ERR_OK;\n}\n\nerror_code replication_app_base::update_init_info_ballot_and_decree(replica *r)\n{\n    return update_init_info(r,\n                            _info.init_offset_in_shared_log,\n                            _info.init_offset_in_private_log,\n                            r->last_durable_decree());\n}\n\nconst app_info *replication_app_base::get_app_info() const { return _replica->get_app_info(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/replication_service_app.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/replication_service_app.h>\n#include <dsn/http/http_server.h>\n\n#include \"common/replication_common.h\"\n#include \"replica_stub.h\"\n#include \"replica_http_service.h\"\n#include \"http/service_version.h\"\n\nnamespace dsn {\nnamespace replication {\n\nvoid replication_service_app::register_all()\n{\n    dsn::service_app::register_factory<replication_service_app>(\"replica\");\n}\n\nreplication_service_app::replication_service_app(const service_app_info *info) : service_app(info)\n{\n    _stub = new replica_stub();\n\n    // add http service\n    register_http_service(new replica_http_service(_stub.get()));\n    start_http_server();\n}\n\nreplication_service_app::~replication_service_app(void) {}\n\nerror_code replication_service_app::start(const std::vector<std::string> &args)\n{\n    if (args.size() >= 2) {\n        app_version.version = *(args.end() - 2);\n        app_version.git_commit = *(args.end() - 1);\n    }\n\n    replication_options opts;\n    opts.initialize();\n\n    _stub->initialize(opts);\n    _stub->open_service();\n\n    return ERR_OK;\n}\n\nerror_code replication_service_app::stop(bool cleanup)\n{\n    if (_stub != nullptr) {\n        _stub->close();\n        _stub = nullptr;\n    }\n\n    return ERR_OK;\n}\n\nvoid replication_service_app::on_intercepted_request(dsn::gpid gpid,\n                                                     bool is_write,\n                                                     dsn::message_ex *msg)\n{\n    if (is_write) {\n        _stub->on_client_write(gpid, msg);\n    } else {\n        _stub->on_client_read(gpid, msg);\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/split/replica_split_manager.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica_split_manager.h\"\n#include \"common/partition_split_common.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\nDSN_DECLARE_bool(empty_write_disabled);\n\nreplica_split_manager::replica_split_manager(replica *r)\n    : replica_base(r), _replica(r), _stub(r->get_replica_stub())\n{\n    _partition_version.store(_replica->_app_info.partition_count - 1);\n}\n\nreplica_split_manager::~replica_split_manager() {}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_start_split(\n    const group_check_request &request) // on parent partition\n{\n    if (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY &&\n        (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient)) {\n        dwarn_replica(\"receive add child request with wrong status({}), ignore this request\",\n                      enum_to_string(status()));\n        return;\n    }\n\n    if (request.config.ballot != get_ballot()) {\n        dwarn_replica(\n            \"receive add child request with different ballot, local ballot({}) VS request \"\n            \"ballot({}), ignore this request\",\n            get_ballot(),\n            request.config.ballot);\n        return;\n    }\n\n    if (_split_status == split_status::SPLITTING) {\n        dwarn_replica(\"partition is already splitting, ignore this request\");\n        return;\n    }\n\n    gpid child_gpid = request.child_gpid;\n    if (child_gpid.get_partition_index() < _replica->_app_info.partition_count) {\n        dwarn_replica(\n            \"receive old add child request, child_gpid={}, partition_count={}, ignore this request\",\n            child_gpid,\n            _replica->_app_info.partition_count);\n        return;\n    }\n\n    if (status() == partition_status::PS_PRIMARY) {\n        _replica->_primary_states.cleanup_split_states();\n    }\n    _partition_version.store(_replica->_app_info.partition_count - 1);\n\n    _split_status = split_status::SPLITTING;\n    _child_gpid = child_gpid;\n    _child_init_ballot = get_ballot();\n\n    ddebug_replica(\"start to add child({}), init_ballot={}, status={}, primary_address={}\",\n                   _child_gpid,\n                   _child_init_ballot,\n                   enum_to_string(status()),\n                   request.config.primary.to_string());\n\n    tasking::enqueue(LPC_CREATE_CHILD,\n                     tracker(),\n                     std::bind(&replica_stub::create_child_replica,\n                               _stub,\n                               _replica->_config.primary,\n                               _replica->_app_info,\n                               _child_init_ballot,\n                               _child_gpid,\n                               get_gpid(),\n                               _replica->_dir),\n                     get_gpid().thread_hash());\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_init_replica(gpid parent_gpid,\n                                               rpc_address primary_address,\n                                               ballot init_ballot) // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_init_replica\", [](dsn::string_view) {});\n\n    if (status() != partition_status::PS_INACTIVE) {\n        dwarn_replica(\"wrong status({})\", enum_to_string(status()));\n        _stub->split_replica_error_handler(\n            parent_gpid,\n            std::bind(&replica_split_manager::parent_cleanup_split_context, std::placeholders::_1));\n        child_handle_split_error(\"invalid child status during initialize\");\n        return;\n    }\n\n    // update replica config\n    _replica->_config.ballot = init_ballot;\n    _replica->_config.primary = primary_address;\n    _replica->_config.status = partition_status::PS_PARTITION_SPLIT;\n\n    // initialize split context\n    _replica->_split_states.parent_gpid = parent_gpid;\n    _replica->_split_states.is_prepare_list_copied = false;\n    _replica->_split_states.is_caught_up = false;\n    _replica->_split_states.check_state_task =\n        tasking::enqueue(LPC_PARTITION_SPLIT,\n                         tracker(),\n                         std::bind(&replica_split_manager::child_check_split_context, this),\n                         get_gpid().thread_hash(),\n                         std::chrono::seconds(3));\n    _replica->_split_states.splitting_start_ts_ns = dsn_now_ns();\n    _stub->_counter_replicas_splitting_recent_start_count->increment();\n\n    ddebug_replica(\n        \"child initialize succeed, init_ballot={}, parent_gpid={}\", init_ballot, parent_gpid);\n\n    error_code ec =\n        _stub->split_replica_exec(LPC_PARTITION_SPLIT,\n                                  _replica->_split_states.parent_gpid,\n                                  std::bind(&replica_split_manager::parent_prepare_states,\n                                            std::placeholders::_1,\n                                            _replica->_app->learn_dir()));\n    if (ec != ERR_OK) {\n        child_handle_split_error(\"parent not exist when execute parent_prepare_states\");\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_check_split_context() // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_check_split_context\", [](dsn::string_view) {});\n\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\"wrong status({})\", enum_to_string(status()));\n        _replica->_split_states.check_state_task = nullptr;\n        return;\n    }\n    // let parent partition check its status\n    error_code ec = _stub->split_replica_exec(\n        LPC_PARTITION_SPLIT,\n        _replica->_split_states.parent_gpid,\n        std::bind(&replica_split_manager::parent_check_states, std::placeholders::_1));\n    if (ec != ERR_OK) {\n        child_handle_split_error(\"check_child_state failed because parent gpid is invalid\");\n        return;\n    }\n\n    _replica->_split_states.check_state_task =\n        tasking::enqueue(LPC_PARTITION_SPLIT,\n                         tracker(),\n                         std::bind(&replica_split_manager::child_check_split_context, this),\n                         get_gpid().thread_hash(),\n                         std::chrono::seconds(3));\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nbool replica_split_manager::parent_check_states() // on parent partition\n{\n    FAIL_POINT_INJECT_F(\"replica_parent_check_states\", [](dsn::string_view) { return true; });\n\n    if (_split_status != split_status::SPLITTING || _child_init_ballot != get_ballot() ||\n        _child_gpid.get_app_id() == 0 ||\n        (status() != partition_status::PS_PRIMARY && status() != partition_status::PS_SECONDARY &&\n         (status() != partition_status::PS_INACTIVE || !_replica->_inactive_is_transient))) {\n        dwarn_replica(\"parent wrong states: status({}), split_status({}), init_ballot({}) VS \"\n                      \"current_ballot({}), \"\n                      \"child_gpid({})\",\n                      enum_to_string(status()),\n                      enum_to_string(_split_status),\n                      _child_init_ballot,\n                      get_ballot(),\n                      _child_gpid);\n        parent_handle_split_error(\"wrong parent states when execute parent_check_states\", false);\n        return false;\n    }\n    return true;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_prepare_states(const std::string &dir) // on parent partition\n{\n    if (!parent_check_states()) {\n        return;\n    }\n\n    learn_state parent_states;\n    int64_t checkpoint_decree;\n    // generate checkpoint\n    error_code ec = _replica->_app->copy_checkpoint_to_dir(dir.c_str(), &checkpoint_decree, true);\n    if (ec == ERR_OK) {\n        ddebug_replica(\"prepare checkpoint succeed: checkpoint dir = {}, checkpoint decree = {}\",\n                       dir,\n                       checkpoint_decree);\n        parent_states.to_decree_included = checkpoint_decree;\n        // learn_state.files[0] will be used to get learn dir in function 'storage_apply_checkpoint'\n        // so we add a fake file name here, this file won't appear on disk\n        parent_states.files.push_back(dsn::utils::filesystem::path_combine(dir, \"file_name\"));\n    } else {\n        dwarn_replica(\"prepare checkpoint failed, error={}, please wait and retry\", ec);\n        tasking::enqueue(LPC_PARTITION_SPLIT,\n                         tracker(),\n                         std::bind(&replica_split_manager::parent_prepare_states, this, dir),\n                         get_gpid().thread_hash(),\n                         std::chrono::seconds(1));\n        return;\n    }\n\n    std::vector<mutation_ptr> mutation_list;\n    std::vector<std::string> files;\n    uint64_t total_file_size = 0;\n    // get mutation and private log\n    _replica->_private_log->get_parent_mutations_and_logs(\n        get_gpid(), checkpoint_decree + 1, invalid_ballot, mutation_list, files, total_file_size);\n\n    // get prepare list\n    std::shared_ptr<prepare_list> plist =\n        std::make_shared<prepare_list>(_replica, *_replica->_prepare_list);\n    plist->truncate(last_committed_decree());\n\n    dcheck_eq(last_committed_decree(), checkpoint_decree);\n    dcheck_ge(mutation_list.size(), 0);\n    dcheck_ge(files.size(), 0);\n    ddebug_replica(\"prepare state succeed: {} mutations, {} private log files, total file size = \"\n                   \"{}, last_committed_decree = {}\",\n                   mutation_list.size(),\n                   files.size(),\n                   total_file_size,\n                   last_committed_decree());\n\n    ec = _stub->split_replica_exec(LPC_PARTITION_SPLIT,\n                                   _child_gpid,\n                                   std::bind(&replica_split_manager::child_copy_prepare_list,\n                                             std::placeholders::_1,\n                                             parent_states,\n                                             mutation_list,\n                                             files,\n                                             total_file_size,\n                                             std::move(plist)));\n    if (ec != ERR_OK) {\n        parent_cleanup_split_context();\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_copy_prepare_list(\n    learn_state lstate,\n    std::vector<mutation_ptr> mutation_list,\n    std::vector<std::string> plog_files,\n    uint64_t total_file_size,\n    std::shared_ptr<prepare_list> plist) // on child partition\n{\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\"wrong status({})\", enum_to_string(status()));\n        return;\n    }\n\n    // learning parent states is time-consuming, should execute in THREAD_POOL_REPLICATION_LONG\n    decree last_committed_decree = plist->last_committed_decree();\n    _replica->_split_states.splitting_start_async_learn_ts_ns = dsn_now_ns();\n    _replica->_split_states.async_learn_task =\n        tasking::enqueue(LPC_PARTITION_SPLIT_ASYNC_LEARN,\n                         tracker(),\n                         std::bind(&replica_split_manager::child_learn_states,\n                                   this,\n                                   lstate,\n                                   mutation_list,\n                                   plog_files,\n                                   total_file_size,\n                                   last_committed_decree));\n\n    ddebug_replica(\"start to copy parent prepare list, last_committed_decree={}, prepare list min \"\n                   \"decree={}, max decree={}\",\n                   last_committed_decree,\n                   plist->min_decree(),\n                   plist->max_decree());\n\n    // copy parent prepare list\n    plist->set_committer(std::bind(&replica::execute_mutation, _replica, std::placeholders::_1));\n    _replica->_prepare_list.reset(new prepare_list(this, *plist));\n    for (decree d = last_committed_decree + 1; d <= _replica->_prepare_list->max_decree(); ++d) {\n        mutation_ptr mu = _replica->_prepare_list->get_mutation_by_decree(d);\n        dassert_replica(mu != nullptr, \"can not find mutation, dercee={}\", d);\n        mu->data.header.pid = get_gpid();\n        _replica->_private_log->append(mu, LPC_WRITE_REPLICATION_LOG_COMMON, tracker(), nullptr);\n        // set mutation has been logged in private log\n        if (!mu->is_logged()) {\n            mu->set_logged();\n        }\n    }\n    _replica->_split_states.is_prepare_list_copied = true;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION_LONG\nvoid replica_split_manager::child_learn_states(learn_state lstate,\n                                               std::vector<mutation_ptr> mutation_list,\n                                               std::vector<std::string> plog_files,\n                                               uint64_t total_file_size,\n                                               decree last_committed_decree) // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_learn_states\", [](dsn::string_view) {});\n\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\"wrong status({})\", enum_to_string(status()));\n        child_handle_async_learn_error();\n        return;\n    }\n\n    ddebug_replica(\"start to learn states asynchronously, prepare_list last_committed_decree={}, \"\n                   \"checkpoint decree range=({},{}], private log files count={}, in-memory \"\n                   \"mutation count={}\",\n                   last_committed_decree,\n                   lstate.from_decree_excluded,\n                   lstate.to_decree_included,\n                   plog_files.size(),\n                   mutation_list.size());\n\n    error_code err;\n    auto cleanup = defer([this, &err]() {\n        if (err != ERR_OK) {\n            child_handle_async_learn_error();\n        }\n    });\n\n    // apply parent checkpoint\n    err = _replica->_app->apply_checkpoint(replication_app_base::chkpt_apply_mode::learn, lstate);\n    if (err != ERR_OK) {\n        derror_replica(\"failed to apply checkpoint, error={}\", err);\n        return;\n    }\n\n    // replay parent private log and learn in-memory mutations\n    err =\n        child_apply_private_logs(plog_files, mutation_list, total_file_size, last_committed_decree);\n    if (err != ERR_OK) {\n        derror_replica(\"failed to replay private log, error={}\", err);\n        return;\n    }\n\n    // generate a checkpoint synchronously\n    err = _replica->_app->sync_checkpoint();\n    if (err != ERR_OK) {\n        derror_replica(\"failed to generate checkpoint synchrounously, error={}\", err);\n        return;\n    }\n\n    err = _replica->update_init_info_ballot_and_decree();\n    if (err != ERR_OK) {\n        derror_replica(\"update_init_info_ballot_and_decree failed, error={}\", err);\n        return;\n    }\n\n    ddebug_replica(\"learn parent states asynchronously succeed\");\n\n    tasking::enqueue(LPC_PARTITION_SPLIT,\n                     tracker(),\n                     std::bind(&replica_split_manager::child_catch_up_states, this),\n                     get_gpid().thread_hash());\n    _replica->_split_states.async_learn_task = nullptr;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION_LONG\nerror_code\nreplica_split_manager::child_apply_private_logs(std::vector<std::string> plog_files,\n                                                std::vector<mutation_ptr> mutation_list,\n                                                uint64_t total_file_size,\n                                                decree last_committed_decree) // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_apply_private_logs\", [](dsn::string_view arg) {\n        return error_code::try_get(arg.data(), ERR_OK);\n    });\n\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\"wrong status({})\", enum_to_string(status()));\n        return ERR_INVALID_STATE;\n    }\n\n    error_code ec;\n    int64_t offset;\n    // temp prepare_list used for apply states\n    prepare_list plist(_replica,\n                       _replica->_app->last_committed_decree(),\n                       _replica->_options->max_mutation_count_in_prepare_list,\n                       [this](mutation_ptr &mu) {\n                           if (mu->data.header.decree ==\n                               _replica->_app->last_committed_decree() + 1) {\n                               _replica->_app->apply_mutation(mu);\n                           }\n                       });\n\n    // replay private log\n    ec = mutation_log::replay(plog_files,\n                              [&plist](int log_length, mutation_ptr &mu) {\n                                  decree d = mu->data.header.decree;\n                                  if (d <= plist.last_committed_decree()) {\n                                      return false;\n                                  }\n                                  mutation_ptr origin_mu = plist.get_mutation_by_decree(d);\n                                  if (origin_mu != nullptr &&\n                                      origin_mu->data.header.ballot >= mu->data.header.ballot) {\n                                      return false;\n                                  }\n                                  plist.prepare(mu, partition_status::PS_SECONDARY);\n                                  return true;\n                              },\n                              offset);\n    if (ec != ERR_OK) {\n        derror_replica(\n            \"replay private_log files failed, file count={}, app last_committed_decree={}\",\n            plog_files.size(),\n            _replica->_app->last_committed_decree());\n        return ec;\n    }\n\n    _replica->_split_states.splitting_copy_file_count += plog_files.size();\n    _replica->_split_states.splitting_copy_file_size += total_file_size;\n    _stub->_counter_replicas_splitting_recent_copy_file_count->add(plog_files.size());\n    _stub->_counter_replicas_splitting_recent_copy_file_size->add(total_file_size);\n\n    ddebug_replica(\"replay private_log files succeed, file count={}, app last_committed_decree={}\",\n                   plog_files.size(),\n                   _replica->_app->last_committed_decree());\n\n    // apply in-memory mutations if replay private logs succeed\n    int count = 0;\n    for (mutation_ptr &mu : mutation_list) {\n        decree d = mu->data.header.decree;\n        if (d <= plist.last_committed_decree()) {\n            continue;\n        }\n        mutation_ptr origin_mu = plist.get_mutation_by_decree(d);\n        if (origin_mu != nullptr && origin_mu->data.header.ballot >= mu->data.header.ballot) {\n            continue;\n        }\n        if (!mu->is_logged()) {\n            mu->set_logged();\n        }\n        plist.prepare(mu, partition_status::PS_SECONDARY);\n        ++count;\n    }\n    _replica->_split_states.splitting_copy_mutation_count += count;\n    _stub->_counter_replicas_splitting_recent_copy_mutation_count->add(count);\n    plist.commit(last_committed_decree, COMMIT_TO_DECREE_HARD);\n    ddebug_replica(\n        \"apply in-memory mutations succeed, mutation count={}, app last_committed_decree={}\",\n        count,\n        _replica->_app->last_committed_decree());\n\n    return ec;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_catch_up_states() // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_catch_up_states\", [](dsn::string_view) {});\n\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\"wrong status, status is {}\", enum_to_string(status()));\n        return;\n    }\n\n    // parent will copy mutations to child during async-learn, as a result:\n    // - child prepare_list last_committed_decree = parent prepare_list last_committed_decree, also\n    // is catch_up goal_decree\n    // - local_decree is child local last_committed_decree which is the last decree in async-learn.\n    decree goal_decree = _replica->_prepare_list->last_committed_decree();\n    decree local_decree = _replica->_app->last_committed_decree();\n\n    // there are mutations written to parent during async-learn\n    // child does not catch up parent, there are still some mutations child not learn\n    if (local_decree < goal_decree) {\n        if (local_decree >= _replica->_prepare_list->min_decree()) {\n            // all missing mutations are all in prepare list\n            dwarn_replica(\"there are some in-memory mutations should be learned, app \"\n                          \"last_committed_decree={}, \"\n                          \"goal decree={}, prepare_list min_decree={}\",\n                          local_decree,\n                          goal_decree,\n                          _replica->_prepare_list->min_decree());\n            for (decree d = local_decree + 1; d <= goal_decree; ++d) {\n                auto mu = _replica->_prepare_list->get_mutation_by_decree(d);\n                dassert(mu != nullptr, \"\");\n                error_code ec = _replica->_app->apply_mutation(mu);\n                if (ec != ERR_OK) {\n                    child_handle_split_error(\"child_catchup failed because apply mutation failed\");\n                    return;\n                }\n            }\n        } else {\n            // some missing mutations have already in private log\n            // should call `catch_up_with_private_logs` to catch up all missing mutations\n            dwarn_replica(\n                \"there are some private logs should be learned, app last_committed_decree=\"\n                \"{}, prepare_list min_decree={}, please wait\",\n                local_decree,\n                _replica->_prepare_list->min_decree());\n            _replica->_split_states.async_learn_task = tasking::enqueue(\n                LPC_CATCHUP_WITH_PRIVATE_LOGS,\n                tracker(),\n                [this]() {\n                    _replica->catch_up_with_private_logs(partition_status::PS_PARTITION_SPLIT);\n                    _replica->_split_states.async_learn_task = nullptr;\n                },\n                get_gpid().thread_hash());\n            return;\n        }\n    }\n\n    ddebug_replica(\"child catch up parent states, goal decree={}, local decree={}\",\n                   _replica->_prepare_list->last_committed_decree(),\n                   _replica->_app->last_committed_decree());\n    _replica->_split_states.is_caught_up = true;\n\n    child_notify_catch_up();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_notify_catch_up() // on child partition\n{\n    FAIL_POINT_INJECT_F(\"replica_child_notify_catch_up\", [](dsn::string_view) {});\n\n    std::unique_ptr<notify_catch_up_request> request = make_unique<notify_catch_up_request>();\n    request->parent_gpid = _replica->_split_states.parent_gpid;\n    request->child_gpid = get_gpid();\n    request->child_ballot = get_ballot();\n    request->child_address = _stub->_primary_address;\n\n    ddebug_replica(\"send notification to primary parent[{}@{}], ballot={}\",\n                   _replica->_split_states.parent_gpid,\n                   _replica->_config.primary.to_string(),\n                   get_ballot());\n\n    notify_catch_up_rpc rpc(std::move(request),\n                            RPC_SPLIT_NOTIFY_CATCH_UP,\n                            /*never timeout*/ 0_ms,\n                            /*partition_hash*/ 0,\n                            _replica->_split_states.parent_gpid.thread_hash());\n    rpc.call(_replica->_config.primary, tracker(), [this, rpc](error_code ec) mutable {\n        auto response = rpc.response();\n        if (ec == ERR_TIMEOUT) {\n            dwarn_replica(\"notify primary catch up timeout, please wait and retry\");\n            tasking::enqueue(LPC_PARTITION_SPLIT,\n                             tracker(),\n                             std::bind(&replica_split_manager::child_notify_catch_up, this),\n                             get_gpid().thread_hash(),\n                             std::chrono::seconds(1));\n            return;\n        }\n        if (ec != ERR_OK || response.err != ERR_OK) {\n            error_code err = (ec == ERR_OK) ? response.err : ec;\n            derror_replica(\"failed to notify primary catch up, error={}\", err);\n            _stub->split_replica_error_handler(\n                _replica->_split_states.parent_gpid,\n                std::bind(&replica_split_manager::parent_cleanup_split_context,\n                          std::placeholders::_1));\n            child_handle_split_error(\"notify_primary_split_catch_up failed\");\n            return;\n        }\n        ddebug_replica(\"notify primary parent[{}@{}] catch up succeed\",\n                       _replica->_split_states.parent_gpid,\n                       _replica->_config.primary.to_string());\n    });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_handle_child_catch_up(\n    const notify_catch_up_request &request,\n    notify_cacth_up_response &response) // on primary parent\n{\n    if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) {\n        derror_replica(\n            \"wrong partition status or wrong split status, partition_status={}, split_status={}\",\n            enum_to_string(status()),\n            enum_to_string(_split_status));\n\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    if (request.child_ballot != get_ballot() || request.child_gpid != _child_gpid) {\n        derror_replica(\"receive out-date request, request ballot ({}) VS local ballot({}), request \"\n                       \"child_gpid({}) VS local child_gpid({})\",\n                       request.child_ballot,\n                       get_ballot(),\n                       request.child_gpid,\n                       _child_gpid);\n        response.err = ERR_INVALID_STATE;\n        return;\n    }\n\n    response.err = ERR_OK;\n    ddebug_replica(\"receive catch_up request from {}@{}, current ballot={}\",\n                   request.child_gpid,\n                   request.child_address.to_string(),\n                   request.child_ballot);\n\n    _replica->_primary_states.caught_up_children.insert(request.child_address);\n    // _primary_states.statuses is a map structure: rpc address -> partition_status\n    // it stores replica's rpc address and partition_status of this replica group\n    for (auto &iter : _replica->_primary_states.statuses) {\n        if (_replica->_primary_states.caught_up_children.find(iter.first) ==\n            _replica->_primary_states.caught_up_children.end()) {\n            // there are child partitions not caught up its parent\n            return;\n        }\n    }\n\n    ddebug_replica(\"all child partitions catch up\");\n    _replica->_primary_states.caught_up_children.clear();\n    _replica->_primary_states.sync_send_write_request = true;\n\n    // sync_point is the first decree after parent send write request to child synchronously\n    // when sync_point commit, parent consider child has all data it should have during async-learn\n    decree sync_point = _replica->_prepare_list->max_decree() + 1;\n    if (!FLAGS_empty_write_disabled) {\n        // empty wirte here to commit sync_point\n        mutation_ptr mu = _replica->new_mutation(invalid_decree);\n        mu->add_client_request(RPC_REPLICATION_WRITE_EMPTY, nullptr);\n        _replica->init_prepare(mu, false);\n        dassert_replica(sync_point == mu->data.header.decree,\n                        \"sync_point should be equal to mutation's decree, {} vs {}\",\n                        sync_point,\n                        mu->data.header.decree);\n    };\n\n    // check if sync_point has been committed\n    tasking::enqueue(\n        LPC_PARTITION_SPLIT,\n        tracker(),\n        std::bind(&replica_split_manager::parent_check_sync_point_commit, this, sync_point),\n        get_gpid().thread_hash(),\n        std::chrono::seconds(1));\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_check_sync_point_commit(decree sync_point) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_parent_check_sync_point_commit\", [](dsn::string_view) {});\n    if (status() != partition_status::PS_PRIMARY) {\n        derror_replica(\"wrong status({})\", enum_to_string(status()));\n        parent_handle_split_error(\"check_sync_point_commit failed, primary changed\", false);\n        return;\n    }\n\n    ddebug_replica(\"sync_point = {}, app last_committed_decree = {}\",\n                   sync_point,\n                   _replica->_app->last_committed_decree());\n    if (_replica->_app->last_committed_decree() >= sync_point) {\n        update_child_group_partition_count(_replica->_app_info.partition_count * 2);\n    } else {\n        dwarn_replica(\"sync_point has not been committed, please wait and retry\");\n        tasking::enqueue(\n            LPC_PARTITION_SPLIT,\n            tracker(),\n            std::bind(&replica_split_manager::parent_check_sync_point_commit, this, sync_point),\n            get_gpid().thread_hash(),\n            std::chrono::seconds(1));\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::update_child_group_partition_count(\n    int new_partition_count) // on primary parent\n{\n    if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) {\n        derror_replica(\n            \"wrong partition status or wrong split status, partition_status={}, split_status={}\",\n            enum_to_string(status()),\n            enum_to_string(_split_status));\n        parent_handle_split_error(\n            \"update_child_group_partition_count failed, wrong partition status or split status\",\n            true);\n        return;\n    }\n\n    if (!_replica->_primary_states.learners.empty() ||\n        _replica->_primary_states.membership.secondaries.size() + 1 <\n            _replica->_primary_states.membership.max_replica_count) {\n        derror_replica(\"there are {} learners or not have enough secondaries(count is {})\",\n                       _replica->_primary_states.learners.size(),\n                       _replica->_primary_states.membership.secondaries.size());\n        parent_handle_split_error(\n            \"update_child_group_partition_count failed, have learner or lack of secondary\", true);\n        return;\n    }\n\n    auto not_replied_addresses = std::make_shared<std::unordered_set<rpc_address>>();\n    // _primary_states.statuses is a map structure: rpc address -> partition_status\n    for (const auto &kv : _replica->_primary_states.statuses) {\n        not_replied_addresses->insert(kv.first);\n    }\n    for (const auto &iter : _replica->_primary_states.statuses) {\n        parent_send_update_partition_count_request(\n            iter.first, new_partition_count, not_replied_addresses);\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_send_update_partition_count_request(\n    const rpc_address &address,\n    int32_t new_partition_count,\n    std::shared_ptr<std::unordered_set<rpc_address>> &not_replied_addresses) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_parent_update_partition_count_request\", [](dsn::string_view) {});\n\n    dcheck_eq_replica(status(), partition_status::PS_PRIMARY);\n\n    auto request = make_unique<update_child_group_partition_count_request>();\n    request->new_partition_count = new_partition_count;\n    request->target_address = address;\n    request->child_pid = _child_gpid;\n    request->ballot = get_ballot();\n\n    ddebug_replica(\n        \"send update child group partition count request to node({}), new partition_count = {}\",\n        address.to_string(),\n        new_partition_count);\n    update_child_group_partition_count_rpc rpc(std::move(request),\n                                               RPC_SPLIT_UPDATE_CHILD_PARTITION_COUNT,\n                                               0_ms,\n                                               0,\n                                               get_gpid().thread_hash());\n    rpc.call(address, tracker(), [this, rpc, not_replied_addresses](error_code ec) mutable {\n        on_update_child_group_partition_count_reply(\n            ec, rpc.request(), rpc.response(), not_replied_addresses);\n    });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_update_child_group_partition_count(\n    const update_child_group_partition_count_request &request,\n    update_child_group_partition_count_response &response) // on child partition\n{\n    if (request.ballot != get_ballot() || !_replica->_split_states.is_caught_up) {\n        derror_replica(\n            \"receive outdated update child group_partition_count_request, request ballot={}, \"\n            \"local ballot={}, is_caught_up={}\",\n            request.ballot,\n            get_ballot(),\n            _replica->_split_states.is_caught_up);\n        response.err = ERR_VERSION_OUTDATED;\n        return;\n    }\n\n    if (_replica->_app_info.partition_count == request.new_partition_count &&\n        _partition_version.load() == request.new_partition_count - 1) {\n        dwarn_replica(\"receive repeated update child group_partition_count_request, \"\n                      \"partition_count = {}, ignore it\",\n                      request.new_partition_count);\n        response.err = ERR_OK;\n        return;\n    }\n\n    dcheck_eq_replica(_replica->_app_info.partition_count * 2, request.new_partition_count);\n    update_local_partition_count(request.new_partition_count);\n    response.err = ERR_OK;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::update_local_partition_count(\n    int32_t new_partition_count) // on all partitions\n{\n    // update _app_info and partition_version\n    auto info = _replica->_app_info;\n    // if app has not been split before, init_partition_count = -1\n    // we should set init_partition_count to old_partition_count\n    if (info.init_partition_count < 0) {\n        info.init_partition_count = info.partition_count;\n    }\n    auto old_partition_count = info.partition_count;\n    info.partition_count = new_partition_count;\n\n    const auto err = _replica->store_app_info(info);\n    if (err != ERR_OK) {\n        info.partition_count = old_partition_count;\n        dassert_replica(false, \"failed to save app_info, error = {}\", err);\n        return;\n    }\n\n    _replica->_app_info = info;\n    ddebug_replica(\"update partition_count from {} to {}\",\n                   old_partition_count,\n                   _replica->_app_info.partition_count);\n\n    _replica->_app->set_partition_version(_replica->_app_info.partition_count - 1);\n    _partition_version.store(_replica->_app_info.partition_count - 1);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_update_child_group_partition_count_reply(\n    error_code ec,\n    const update_child_group_partition_count_request &request,\n    const update_child_group_partition_count_response &response,\n    std::shared_ptr<std::unordered_set<rpc_address>> &not_replied_addresses) // on primary parent\n{\n    _replica->_checker.only_one_thread_access();\n\n    if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) {\n        derror_replica(\n            \"wrong partition status or wrong split status, partition_status={}, split_status={}\",\n            enum_to_string(status()),\n            enum_to_string(_split_status));\n        parent_handle_split_error(\"on_update_child_group_partition_count_reply failed, wrong \"\n                                  \"partition status or split status\",\n                                  true);\n        return;\n    }\n\n    if (request.ballot != get_ballot()) {\n        derror_replica(\n            \"ballot changed, request ballot = {}, local ballot = {}\", request.ballot, get_ballot());\n        parent_handle_split_error(\n            \"on_update_child_group_partition_count_reply failed, ballot changed\", true);\n        return;\n    }\n\n    error_code error = (ec == ERR_OK) ? response.err : ec;\n    if (error == ERR_TIMEOUT) {\n        dwarn_replica(\"failed to update child node({}) partition_count, error = {}, wait and retry\",\n                      request.target_address.to_string(),\n                      error);\n        tasking::enqueue(\n            LPC_PARTITION_SPLIT,\n            tracker(),\n            std::bind(&replica_split_manager::parent_send_update_partition_count_request,\n                      this,\n                      request.target_address,\n                      request.new_partition_count,\n                      not_replied_addresses),\n            get_gpid().thread_hash(),\n            std::chrono::seconds(1));\n        return;\n    }\n\n    if (error != ERR_OK) {\n        derror_replica(\"failed to update child node({}) partition_count({}), error = {}\",\n                       request.target_address.to_string(),\n                       request.new_partition_count,\n                       error);\n        parent_handle_split_error(\"on_update_child_group_partition_count_reply error\", true);\n        return;\n    }\n\n    ddebug_replica(\"update node({}) child({}) partition_count({}) succeed\",\n                   request.target_address.to_string(),\n                   request.child_pid,\n                   request.new_partition_count);\n\n    // update group partition_count succeed\n    not_replied_addresses->erase(request.target_address);\n    if (not_replied_addresses->empty()) {\n        ddebug_replica(\"update child({}) group partition_count, new_partition_count = {}\",\n                       request.child_pid,\n                       request.new_partition_count);\n        register_child_on_meta(get_ballot());\n    } else {\n        ddebug_replica(\"there are still {} replica not update partition count in child group\",\n                       not_replied_addresses->size());\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::register_child_on_meta(ballot b) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_register_child_on_meta\", [](dsn::string_view) {});\n\n    if (status() != partition_status::PS_PRIMARY || _split_status != split_status::SPLITTING) {\n        derror_replica(\n            \"wrong partition status or wrong split status, partition_status={}, split_status={}\",\n            enum_to_string(status()),\n            enum_to_string(_split_status));\n        parent_handle_split_error(\"register child failed, wrong partition status or split status\",\n                                  true);\n        return;\n    }\n\n    if (_replica->_primary_states.reconfiguration_task != nullptr) {\n        dwarn_replica(\"under reconfiguration, delay and retry to register child\");\n        _replica->_primary_states.register_child_task =\n            tasking::enqueue(LPC_PARTITION_SPLIT,\n                             tracker(),\n                             std::bind(&replica_split_manager::register_child_on_meta, this, b),\n                             get_gpid().thread_hash(),\n                             std::chrono::seconds(1));\n        return;\n    }\n\n    partition_configuration child_config = _replica->_primary_states.membership;\n    child_config.ballot++;\n    child_config.last_committed_decree = 0;\n    child_config.last_drops.clear();\n    child_config.pid.set_partition_index(_replica->_app_info.partition_count +\n                                         get_gpid().get_partition_index());\n\n    register_child_request request;\n    request.app = _replica->_app_info;\n    request.child_config = child_config;\n    request.parent_config = _replica->_primary_states.membership;\n    request.primary_address = _stub->_primary_address;\n\n    // reject client request\n    _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_INACTIVE);\n    _replica->set_inactive_state_transient(true);\n    int32_t old_partition_version = _partition_version.exchange(-1);\n    ddebug_replica(\"update partition version from {} to {}\", old_partition_version, -1);\n\n    parent_send_register_request(request);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_send_register_request(\n    const register_child_request &request) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_parent_send_register_request\", [](dsn::string_view) {});\n\n    dcheck_eq_replica(status(), partition_status::PS_INACTIVE);\n    ddebug_replica(\n        \"send register child({}) request to meta_server, current ballot = {}, child ballot = {}\",\n        request.child_config.pid,\n        request.parent_config.ballot,\n        request.child_config.ballot);\n\n    rpc_address meta_address(_stub->_failure_detector->get_servers());\n    std::unique_ptr<register_child_request> req = make_unique<register_child_request>(request);\n    register_child_rpc rpc(std::move(req),\n                           RPC_CM_REGISTER_CHILD_REPLICA,\n                           /*never timeout*/ 0_ms,\n                           /*partition hash*/ 0,\n                           get_gpid().thread_hash());\n\n    _replica->_primary_states.register_child_task =\n        rpc.call(meta_address, tracker(), [this, rpc](error_code ec) mutable {\n            on_register_child_on_meta_reply(ec, rpc.request(), rpc.response());\n        });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_register_child_on_meta_reply(\n    error_code ec,\n    const register_child_request &request,\n    const register_child_response &response) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_on_register_child_on_meta_reply\", [](dsn::string_view) {});\n\n    _replica->_checker.only_one_thread_access();\n\n    // primary parent is under reconfiguration, whose status should be PS_INACTIVE\n    if (partition_status::PS_INACTIVE != status() || !_stub->is_connected()) {\n        derror_replica(\"status wrong or stub is not connected, status = {}\",\n                       enum_to_string(status()));\n        _replica->_primary_states.register_child_task = nullptr;\n        return;\n    }\n\n    error_code err = ec == ERR_OK ? response.err : ec;\n    if (err == ERR_INVALID_STATE || err == ERR_INVALID_VERSION || err == ERR_CHILD_REGISTERED) {\n        if (err == ERR_CHILD_REGISTERED) {\n            derror_replica(\n                \"register child({}) failed, error = {}, child has already been registered\",\n                request.child_config.pid,\n                err);\n        } else {\n            derror_replica(\"register child({}) failed, error = {}, request is out-of-dated\",\n                           request.child_config.pid,\n                           err);\n            _stub->split_replica_error_handler(\n                request.child_config.pid,\n                std::bind(&replica_split_manager::child_handle_split_error,\n                          std::placeholders::_1,\n                          \"register child failed, request is out-of-dated\"));\n        }\n        parent_cleanup_split_context();\n        _replica->_primary_states.register_child_task = nullptr;\n        _replica->_primary_states.sync_send_write_request = false;\n        if (response.parent_config.ballot >= get_ballot()) {\n            ddebug_replica(\"response ballot = {}, local ballot = {}, should update configuration\",\n                           response.parent_config.ballot,\n                           get_ballot());\n            _replica->update_configuration(response.parent_config);\n        }\n        return;\n    }\n\n    if (err != ERR_OK) {\n        dwarn_replica(\n            \"register child({}) failed, error = {}, wait and retry\", request.child_config.pid, err);\n        _replica->_primary_states.register_child_task = tasking::enqueue(\n            LPC_PARTITION_SPLIT,\n            tracker(),\n            std::bind(&replica_split_manager::parent_send_register_request, this, request),\n            get_gpid().thread_hash(),\n            std::chrono::seconds(1));\n        return;\n    }\n\n    if (response.parent_config.ballot < get_ballot()) {\n        dwarn_replica(\n            \"register child({}) failed, parent ballot from response is {}, local ballot is {}\",\n            request.child_config.pid,\n            response.parent_config.ballot,\n            get_ballot());\n        _replica->_primary_states.register_child_task = tasking::enqueue(\n            LPC_PARTITION_SPLIT,\n            tracker(),\n            std::bind(&replica_split_manager::parent_send_register_request, this, request),\n            get_gpid().thread_hash(),\n            std::chrono::seconds(1));\n        return;\n    }\n\n    ddebug_replica(\"register child({}) succeed, response parent ballot = {}, local ballot = \"\n                   \"{}, local status = {}\",\n                   request.child_config.pid,\n                   response.parent_config.ballot,\n                   get_ballot(),\n                   enum_to_string(status()));\n\n    dcheck_ge_replica(response.parent_config.ballot, get_ballot());\n    dcheck_eq_replica(_replica->_app_info.partition_count * 2, response.app.partition_count);\n\n    _stub->split_replica_exec(LPC_PARTITION_SPLIT,\n                              response.child_config.pid,\n                              std::bind(&replica_split_manager::child_partition_active,\n                                        std::placeholders::_1,\n                                        response.child_config));\n\n    // update parent config\n    _replica->update_configuration(response.parent_config);\n    _replica->_primary_states.register_child_task = nullptr;\n    _replica->_primary_states.sync_send_write_request = false;\n\n    // update primary parent group partition_count\n    update_local_partition_count(_replica->_app_info.partition_count * 2);\n    _meta_split_status = split_status::NOT_SPLIT;\n    _replica->broadcast_group_check();\n\n    parent_cleanup_split_context();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_partition_active(\n    const partition_configuration &config) // on child\n{\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        dwarn_replica(\"child partition has been active, status={}\", enum_to_string(status()));\n        return;\n    }\n\n    _stub->_counter_replicas_splitting_recent_split_succ_count->increment();\n    _replica->_primary_states.last_prepare_decree_on_new_primary =\n        _replica->_prepare_list->max_decree();\n    _replica->update_configuration(config);\n    _stub->_counter_replicas_splitting_recent_split_succ_count->increment();\n    ddebug_replica(\"child partition is active, status={}\", enum_to_string(status()));\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_cleanup_split_context() // on parent partition\n{\n    _child_gpid.set_app_id(0);\n    _child_init_ballot = 0;\n    _split_status = split_status::NOT_SPLIT;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::child_handle_split_error(\n    const std::string &error_msg) // on child partition\n{\n    if (status() != partition_status::PS_ERROR) {\n        derror_replica(\"child partition split failed because {}, parent = {}, split_duration = \"\n                       \"{}ms, async_learn_duration = {}ms\",\n                       error_msg,\n                       _replica->_split_states.parent_gpid,\n                       _replica->_split_states.total_ms(),\n                       _replica->_split_states.async_learn_ms());\n        _stub->_counter_replicas_splitting_recent_split_fail_count->increment();\n        _replica->update_local_configuration_with_no_ballot_change(partition_status::PS_ERROR);\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION_LONG\nvoid replica_split_manager::child_handle_async_learn_error() // on child partition\n{\n    _stub->split_replica_error_handler(\n        _replica->_split_states.parent_gpid,\n        std::bind(&replica_split_manager::parent_cleanup_split_context, std::placeholders::_1));\n    child_handle_split_error(\"meet error when execute child_learn_states\");\n    _replica->_split_states.async_learn_task = nullptr;\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_handle_split_error(const std::string &child_err_msg,\n                                                      bool parent_clear_sync)\n{\n    _stub->split_replica_error_handler(_child_gpid,\n                                       std::bind(&replica_split_manager::child_handle_split_error,\n                                                 std::placeholders::_1,\n                                                 child_err_msg));\n    if (parent_clear_sync) {\n        _replica->_primary_states.sync_send_write_request = false;\n    }\n    parent_cleanup_split_context();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::trigger_primary_parent_split(\n    const int32_t meta_partition_count,\n    const split_status::type meta_split_status) // on primary parent partition\n{\n    dcheck_eq_replica(status(), partition_status::PS_PRIMARY);\n    dcheck_eq_replica(_replica->_app_info.partition_count * 2, meta_partition_count);\n    ddebug_replica(\"app({}) partition count changed, local({}) VS meta({}), split_status local({}) \"\n                   \"VS meta({})\",\n                   _replica->_app_info.app_name,\n                   _replica->_app_info.partition_count,\n                   meta_partition_count,\n                   enum_to_string(_split_status),\n                   enum_to_string(meta_split_status));\n\n    _meta_split_status = meta_split_status;\n    if (meta_split_status == split_status::SPLITTING) {\n        if (!_replica->_primary_states.learners.empty() ||\n            _replica->_primary_states.membership.secondaries.size() + 1 <\n                _replica->_primary_states.membership.max_replica_count) {\n            dwarn_replica(\n                \"there are {} learners or not have enough secondaries(count is {}), wait for \"\n                \"next round\",\n                _replica->_primary_states.learners.size(),\n                _replica->_primary_states.membership.secondaries.size());\n            return;\n        }\n\n        group_check_request add_child_request;\n        add_child_request.app = _replica->_app_info;\n        _replica->_primary_states.get_replica_config(status(), add_child_request.config);\n        auto child_gpid =\n            gpid(get_gpid().get_app_id(),\n                 get_gpid().get_partition_index() + _replica->_app_info.partition_count);\n        add_child_request.__set_child_gpid(child_gpid);\n        parent_start_split(add_child_request);\n        // broadcast group check request to secondaries to start split\n        _replica->broadcast_group_check();\n        return;\n    }\n\n    if (meta_split_status == split_status::PAUSING ||\n        meta_split_status == split_status::CANCELING) {\n        parent_stop_split(meta_split_status);\n        return;\n    }\n\n    if (meta_split_status == split_status::PAUSED) {\n        dwarn_replica(\"split has been paused, ignore it\");\n        return;\n    }\n\n    // meta_split_status == split_status::NOT_SPLIT\n    // meta partition_count = replica paritition_count * 2\n    // There will be two cases:\n    // - case1. when primary replica register child succeed, but replica server crashed.\n    //   meta server will consider this parent partition not splitting, but parent group\n    //   partition_count is not updated\n    //   in this case, child has been registered on meta server\n    // - case2. when this parent partition is canceled, but other partitions is still canceling.\n    //   in this case, child partition ballot is invalid_ballot\n    // As a result, primary should send query_child_state rpc to meta server\n    query_child_state();\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::trigger_secondary_parent_split(\n    const group_check_request &request,\n    /*out*/ group_check_response &response) // on secondary parent partition\n{\n    if (request.app.partition_count ==\n        _replica->_app_info.partition_count * 2) { // secondary update partition count\n        update_local_partition_count(request.app.partition_count);\n        parent_cleanup_split_context();\n        return;\n    }\n\n    if (!request.__isset.meta_split_status) {\n        return;\n    }\n\n    if (request.meta_split_status == split_status::SPLITTING &&\n        request.__isset.child_gpid) { // secondary create child replica\n        parent_start_split(request);\n        return;\n    }\n\n    if (request.meta_split_status == split_status::PAUSING ||\n        request.meta_split_status == split_status::CANCELING) { // secondary pause or cancel split\n        parent_stop_split(request.meta_split_status);\n        response.__set_is_split_stopped(true);\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::copy_mutation(mutation_ptr &mu) // on parent partition\n{\n    dassert_replica(_child_gpid.get_app_id() > 0, \"child_gpid({}) is invalid\", _child_gpid);\n\n    if (mu->is_sync_to_child()) {\n        mu->wait_child();\n    }\n\n    mutation_ptr new_mu = mutation::copy_no_reply(mu);\n    error_code ec = _stub->split_replica_exec(\n        LPC_PARTITION_SPLIT,\n        _child_gpid,\n        std::bind(&replica_split_manager::on_copy_mutation, std::placeholders::_1, new_mu));\n    if (ec != ERR_OK) {\n        parent_cleanup_split_context();\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_copy_mutation(mutation_ptr &mu) // on child partition\n{\n    if (status() != partition_status::PS_PARTITION_SPLIT) {\n        derror_replica(\n            \"wrong status({}), ignore this mutation({})\", enum_to_string(status()), mu->name());\n        _stub->split_replica_error_handler(\n            _replica->_split_states.parent_gpid, [mu](replica_split_manager *split_mgr) {\n                split_mgr->parent_cleanup_split_context();\n                split_mgr->on_copy_mutation_reply(\n                    ERR_OK, mu->data.header.ballot, mu->data.header.decree);\n            });\n        return;\n    }\n\n    // It is possible for child has not copied parent prepare list, because parent and child may\n    // execute in different thread. In this case, child should ignore this mutation.\n    if (!_replica->_split_states.is_prepare_list_copied) {\n        return;\n    }\n\n    if (mu->data.header.ballot > get_ballot()) {\n        derror_replica(\"ballot changed, mutation ballot({}) vs local ballot({}), ignore copy this \"\n                       \"mutation({})\",\n                       mu->data.header.ballot,\n                       get_ballot(),\n                       mu->name());\n        _stub->split_replica_error_handler(\n            _replica->_split_states.parent_gpid, [mu](replica_split_manager *split_mgr) {\n                split_mgr->parent_cleanup_split_context();\n                split_mgr->on_copy_mutation_reply(\n                    ERR_OK, mu->data.header.ballot, mu->data.header.decree);\n            });\n        child_handle_split_error(\"on_copy_mutation failed because ballot changed\");\n        return;\n    }\n\n    mu->data.header.pid = get_gpid();\n    _replica->_prepare_list->prepare(mu, partition_status::PS_SECONDARY);\n    if (!mu->is_sync_to_child()) { // child copy mutation asynchronously\n        if (!mu->is_logged()) {\n            mu->set_logged();\n        }\n        mu->log_task() = _replica->_private_log->append(\n            mu, LPC_WRITE_REPLICATION_LOG, tracker(), nullptr, get_gpid().thread_hash());\n    } else { // child sync copy mutation\n        mu->log_task() = _replica->_private_log->append(mu,\n                                                        LPC_WRITE_REPLICATION_LOG,\n                                                        tracker(),\n                                                        std::bind(&replica::on_append_log_completed,\n                                                                  _replica,\n                                                                  mu,\n                                                                  std::placeholders::_1,\n                                                                  std::placeholders::_2),\n                                                        get_gpid().thread_hash());\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::ack_parent(error_code ec, mutation_ptr &mu) // on child partition\n{\n    dassert_replica(mu->is_sync_to_child(), \"mutation({}) should be copied synchronously\");\n    _stub->split_replica_exec(LPC_PARTITION_SPLIT,\n                              _replica->_split_states.parent_gpid,\n                              std::bind(&replica_split_manager::on_copy_mutation_reply,\n                                        std::placeholders::_1,\n                                        ec,\n                                        mu->data.header.ballot,\n                                        mu->data.header.decree));\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_copy_mutation_reply(error_code ec,\n                                                   ballot b,\n                                                   decree d) // on parent partition\n{\n    _replica->_checker.only_one_thread_access();\n\n    auto mu = _replica->_prepare_list->get_mutation_by_decree(d);\n    if (mu == nullptr) {\n        derror_replica(\"failed to get mutation in prepare list, decree = {}\", d);\n        return;\n    }\n\n    if (mu->data.header.ballot != b) {\n        derror_replica(\"ballot not match, mutation ballot({}) vs child mutation ballot({})\",\n                       mu->data.header.ballot,\n                       b);\n        return;\n    }\n\n    // set child prepare mutation flag\n    if (ec == ERR_OK) {\n        mu->child_acked();\n    } else {\n        derror_replica(\"child({}) copy mutation({}) failed, ballot={}, decree={}, error={}\",\n                       _child_gpid,\n                       mu->name(),\n                       b,\n                       d,\n                       ec);\n    }\n\n    // handle child ack\n    if (mu->data.header.ballot >= get_ballot() && status() != partition_status::PS_INACTIVE) {\n        switch (status()) {\n        case partition_status::PS_PRIMARY:\n            if (ec != ERR_OK) {\n                _replica->handle_local_failure(ec);\n            } else {\n                _replica->do_possible_commit_on_primary(mu);\n            }\n            break;\n        case partition_status::PS_SECONDARY:\n        case partition_status::PS_POTENTIAL_SECONDARY:\n            if (ec != ERR_OK) {\n                _replica->handle_local_failure(ec);\n            }\n            _replica->ack_prepare_message(ec, mu);\n            break;\n        case partition_status::PS_ERROR:\n            break;\n        default:\n            dassert_replica(false, \"wrong status({})\", enum_to_string(status()));\n            break;\n        }\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_stop_split(\n    split_status::type meta_split_status) // on parent partition\n{\n    dassert_replica(status() == partition_status::PS_PRIMARY ||\n                        status() == partition_status::PS_SECONDARY,\n                    \"wrong partition_status({})\",\n                    enum_to_string(status()));\n    dassert_replica(_split_status == split_status::SPLITTING ||\n                        _split_status == split_status::NOT_SPLIT,\n                    \"wrong split_status({})\",\n                    enum_to_string(_split_status));\n\n    auto old_status = _split_status;\n    if (_split_status == split_status::SPLITTING) {\n        parent_handle_split_error(\"stop partition split\", false);\n    }\n    _partition_version.store(_replica->_app_info.partition_count - 1);\n\n    if (status() == partition_status::PS_PRIMARY) {\n        _replica->_primary_states.sync_send_write_request = false;\n        _replica->broadcast_group_check();\n    }\n    ddebug_replica(\n        \"{} split succeed, status = {}, old split_status = {}, child partition_index = {}\",\n        meta_split_status == split_status::PAUSING ? \"pause\" : \"cancel\",\n        enum_to_string(status()),\n        enum_to_string(old_status),\n        get_gpid().get_partition_index() + _replica->_app_info.partition_count);\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::primary_parent_handle_stop_split(\n    const std::shared_ptr<group_check_request> &req,\n    const std::shared_ptr<group_check_response> &resp) // on primary parent partition\n{\n    if (!req->__isset.meta_split_status || (req->meta_split_status != split_status::PAUSING &&\n                                            req->meta_split_status != split_status::CANCELING)) {\n        // partition is not executing split or not stopping split\n        return;\n    }\n\n    if (!resp->__isset.is_split_stopped || !resp->is_split_stopped) {\n        // secondary has not stopped split\n        return;\n    }\n\n    _replica->_primary_states.split_stopped_secondary.insert(req->node);\n    auto count = 0;\n    for (auto &iter : _replica->_primary_states.statuses) {\n        if (iter.second == partition_status::PS_SECONDARY &&\n            _replica->_primary_states.split_stopped_secondary.find(iter.first) !=\n                _replica->_primary_states.split_stopped_secondary.end()) {\n            ++count;\n        }\n    }\n    // all secondaries have already stop split succeed\n    if (count == _replica->_primary_states.membership.max_replica_count - 1) {\n        _replica->_primary_states.cleanup_split_states();\n        parent_send_notify_stop_request(req->meta_split_status);\n    }\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::parent_send_notify_stop_request(\n    split_status::type meta_split_status) // on primary parent\n{\n    FAIL_POINT_INJECT_F(\"replica_parent_send_notify_stop_request\", [](dsn::string_view) {});\n    rpc_address meta_address(_stub->_failure_detector->get_servers());\n    std::unique_ptr<notify_stop_split_request> req = make_unique<notify_stop_split_request>();\n    req->app_name = _replica->_app_info.app_name;\n    req->parent_gpid = get_gpid();\n    req->meta_split_status = meta_split_status;\n    req->partition_count = _replica->_app_info.partition_count;\n\n    ddebug_replica(\"group {} split succeed, send notify_stop_request to meta server({})\",\n                   meta_split_status == split_status::PAUSING ? \"pause\" : \"cancel\",\n                   meta_address.to_string());\n    notify_stop_split_rpc rpc(\n        std::move(req), RPC_CM_NOTIFY_STOP_SPLIT, 0_ms, 0, get_gpid().thread_hash());\n    rpc.call(meta_address, tracker(), [this, rpc](error_code ec) mutable {\n        error_code err = ec == ERR_OK ? rpc.response().err : ec;\n        const std::string type =\n            rpc.request().meta_split_status == split_status::PAUSING ? \"pause\" : \"cancel\";\n        if (err != ERR_OK) {\n            dwarn_replica(\"notify {} split failed, error = {}, wait for next round\", type, err);\n        }\n    });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::query_child_state() // on primary parent\n{\n    auto request = make_unique<query_child_state_request>();\n    request->app_name = _replica->_app_info.app_name;\n    request->pid = get_gpid();\n    request->partition_count = _replica->_app_info.partition_count;\n\n    rpc_address meta_address(_stub->_failure_detector->get_servers());\n    ddebug_replica(\"send query child partition state request to meta server({})\",\n                   meta_address.to_string());\n    query_child_state_rpc rpc(\n        std::move(request), RPC_CM_QUERY_CHILD_STATE, 0_ms, 0, get_gpid().thread_hash());\n    _replica->_primary_states.query_child_task =\n        rpc.call(meta_address, tracker(), [this, rpc](error_code ec) mutable {\n            on_query_child_state_reply(ec, rpc.request(), rpc.response());\n        });\n}\n\n// ThreadPool: THREAD_POOL_REPLICATION\nvoid replica_split_manager::on_query_child_state_reply(\n    error_code ec,\n    const query_child_state_request &request,\n    const query_child_state_response &response) // on primary parent\n{\n    _replica->_checker.only_one_thread_access();\n\n    if (ec != ERR_OK) {\n        dwarn_replica(\"query child partition state failed, error = {}, retry it later\", ec);\n        _replica->_primary_states.query_child_task =\n            tasking::enqueue(LPC_PARTITION_SPLIT,\n                             tracker(),\n                             std::bind(&replica_split_manager::query_child_state, this),\n                             get_gpid().thread_hash(),\n                             std::chrono::seconds(1));\n        return;\n    }\n\n    if (response.err != ERR_OK) {\n        dwarn_replica(\"app({}) partition({}) split has been canceled, ignore it\",\n                      request.app_name,\n                      request.pid);\n        return;\n    }\n\n    ddebug_replica(\"query child partition succeed, child partition[{}] has already been ready\",\n                   response.child_config.pid);\n    // make child partition active\n    _stub->split_replica_exec(LPC_PARTITION_SPLIT,\n                              response.child_config.pid,\n                              std::bind(&replica_split_manager::child_partition_active,\n                                        std::placeholders::_1,\n                                        response.child_config));\n    update_local_partition_count(response.partition_count);\n    _replica->_primary_states.cleanup_split_states();\n    parent_cleanup_split_context();\n    // update parent group partition_count\n    _replica->broadcast_group_check();\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/split/replica_split_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"replica/replica.h\"\n#include \"replica/replica_context.h\"\n#include \"replica/replica_stub.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_split_manager : replica_base\n{\npublic:\n    explicit replica_split_manager(replica *r);\n    ~replica_split_manager();\n\n    int32_t get_partition_version() const { return _partition_version.load(); }\n    gpid get_child_gpid() const { return _child_gpid; }\n    void set_child_gpid(gpid pid) { _child_gpid = pid; }\n    bool is_splitting() const\n    {\n        return _child_gpid.get_app_id() > 0 && _child_init_ballot > 0 &&\n               _split_status == split_status::SPLITTING;\n    }\n    split_status::type get_meta_split_status() { return _meta_split_status; }\n\nprivate:\n    // parent partition start split\n    void parent_start_split(const group_check_request &request);\n\n    // child replica initialize config and state info\n    void child_init_replica(gpid parent_gpid, rpc_address primary_address, ballot init_ballot);\n\n    void parent_prepare_states(const std::string &dir);\n\n    // child copy parent prepare list and call child_learn_states\n    void child_copy_prepare_list(learn_state lstate,\n                                 std::vector<mutation_ptr> mutation_list,\n                                 std::vector<std::string> plog_files,\n                                 uint64_t total_file_size,\n                                 std::shared_ptr<prepare_list> plist);\n\n    // child learn states(including checkpoint, private logs, in-memory mutations)\n    void child_learn_states(learn_state lstate,\n                            std::vector<mutation_ptr> mutation_list,\n                            std::vector<std::string> plog_files,\n                            uint64_t total_file_size,\n                            decree last_committed_decree);\n\n    // TODO(heyuchen): total_file_size is used for split perf-counter in further pull request\n    // Applies mutation logs that were learned from the parent of this child.\n    // This stage follows after that child applies the checkpoint of parent, and begins to apply the\n    // mutations.\n    // \\param last_committed_decree: parent's last_committed_decree when the checkpoint was\n    // generated.\n    error_code child_apply_private_logs(std::vector<std::string> plog_files,\n                                        std::vector<mutation_ptr> mutation_list,\n                                        uint64_t total_file_size,\n                                        decree last_committed_decree);\n\n    // child catch up parent states while executing async learn task\n    void child_catch_up_states();\n\n    // child send notification to primary parent when it finish async learn\n    void child_notify_catch_up();\n\n    // primary parent handle child catch_up request\n    void parent_handle_child_catch_up(const notify_catch_up_request &request,\n                                      notify_cacth_up_response &response);\n\n    // primary parent check if sync_point has been committed\n    // sync_point is the first decree after parent send write request to child synchronously\n    void parent_check_sync_point_commit(decree sync_point);\n\n    // primary parent update child group partition count\n    void update_child_group_partition_count(int32_t new_partition_count);\n\n    void parent_send_update_partition_count_request(\n        const rpc_address &address,\n        int32_t new_partition_count,\n        std::shared_ptr<std::unordered_set<rpc_address>> &not_replied_addresses);\n\n    // child update its partition_count\n    void\n    on_update_child_group_partition_count(const update_child_group_partition_count_request &request,\n                                          update_child_group_partition_count_response &response);\n\n    void on_update_child_group_partition_count_reply(\n        error_code ec,\n        const update_child_group_partition_count_request &request,\n        const update_child_group_partition_count_response &response,\n        std::shared_ptr<std::unordered_set<rpc_address>> &not_replied_addresses);\n\n    // all replicas update partition_count in memory and disk\n    void update_local_partition_count(int32_t new_partition_count);\n\n    // primary parent register children on meta_server\n    void register_child_on_meta(ballot b);\n    void on_register_child_on_meta_reply(error_code ec,\n                                         const register_child_request &request,\n                                         const register_child_response &response);\n    // primary sends register request to meta_server\n    void parent_send_register_request(const register_child_request &request);\n\n    // child partition has been registered on meta_server, could be active\n    void child_partition_active(const partition_configuration &config);\n\n    // return true if parent status is valid\n    bool parent_check_states();\n    // check if child status is valid\n    void child_check_split_context();\n\n    // parent reset child information when partition split failed\n    void parent_cleanup_split_context();\n    // child suicide when partition split failed\n    void child_handle_split_error(const std::string &error_msg);\n    // child handle error while async learn parent states\n    void child_handle_async_learn_error();\n    // parent reset its split context and let child handle error\n    void parent_handle_split_error(const std::string &child_err_msg, bool parent_clear_sync);\n\n    // called by `on_config_sync` in `replica_config.cpp`\n    // primary parent start or stop split according to meta_split_status\n    void trigger_primary_parent_split(const int32_t meta_partition_count,\n                                      const split_status::type meta_split_status);\n\n    // called by `on_group_check` in `replica_check.cpp`\n    // secondary parent check whether should start or stop split\n    void trigger_secondary_parent_split(const group_check_request &request,\n                                        /*out*/ group_check_response &response);\n\n    // parent copy mutations to child during partition split\n    void copy_mutation(mutation_ptr &mu);\n\n    // child add mutation into prepare list and private log\n    // after child copy prepare list, before child replica become active\n    void on_copy_mutation(mutation_ptr &mu);\n\n    // when child copy mutation synchronously, child replica send ack to its parent\n    void ack_parent(dsn::error_code ec, mutation_ptr &mu);\n\n    // when child copy mutation synchronously, parent replica handle child ack\n    void on_copy_mutation_reply(dsn::error_code ec, ballot b, decree d);\n\n    // parent partition pause or cancel split\n    void parent_stop_split(split_status::type meta_split_status);\n\n    // called by `on_group_check_reply` in `replica_check.cpp`\n    // if group all replica pause/cancel split, send notify request to meta server\n    void primary_parent_handle_stop_split(const std::shared_ptr<group_check_request> &req,\n                                          const std::shared_ptr<group_check_response> &resp);\n    void parent_send_notify_stop_request(split_status::type meta_split_status);\n\n    // called by `trigger_primary_parent_split`, query child state on meta server\n    void query_child_state();\n    void on_query_child_state_reply(error_code ec,\n                                    const query_child_state_request &request,\n                                    const query_child_state_response &response);\n\n    //\n    // helper functions\n    //\n    partition_status::type status() const { return _replica->status(); }\n    ballot get_ballot() const { return _replica->get_ballot(); }\n    decree last_committed_decree() const { return _replica->last_committed_decree(); }\n    task_tracker *tracker() { return _replica->tracker(); }\n    bool should_reject_request() const { return get_partition_version() == -1; }\n    bool check_partition_hash(const uint64_t &partition_hash, const std::string &op) const\n    {\n        auto target_pidx = get_partition_version() & partition_hash;\n        if (dsn_unlikely(target_pidx != get_gpid().get_partition_index())) {\n            derror_replica(\n                \"receive {} request with wrong partition_hash({}), partition_version = {}, \"\n                \"target_pidx = {}\",\n                op,\n                partition_hash,\n                get_partition_version(),\n                target_pidx);\n            return false;\n        }\n        return true;\n    }\n\nprivate:\n    replica *_replica;\n    replica_stub *_stub;\n\n    friend class replica;\n    friend class replica_stub;\n    friend class replica_split_test;\n\n    split_status::type _split_status{split_status::NOT_SPLIT};\n\n    // _child_gpid = gpid({app_id},{pidx}+{old_partition_count}) for parent partition\n    // _child_gpid.app_id = 0 for parent partition not in partition split and child partition\n    gpid _child_gpid{0, 0};\n    // ballot when starting partition split and split will stop if ballot changed\n    // _child_init_ballot = 0 if partition not in partition split\n    ballot _child_init_ballot{0};\n    // in normal cases, _partition_version = partition_count-1\n    // when replica reject client read write request, partition_version = -1\n    std::atomic<int32_t> _partition_version;\n\n    // Used for primary parent\n    // It will be updated each time when config sync from meta\n    // TODO(heyuchen): clear it when primary parent clean up status\n    split_status::type _meta_split_status{split_status::NOT_SPLIT};\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/split/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_replica_split_test)\n\nset(MY_PROJ_SRC \"\")\n\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_meta_server\n        dsn_replica_server\n        dsn_replication_common\n        dsn_runtime\n        hashtable\n        gtest\n)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_BINPLACES\n        config-test.ini\n        run.sh\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/split/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.replica]\ntype = replica\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 1465902258\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 4\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 2\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 3\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n"
  },
  {
    "path": "src/replica/split/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nclass gtest_app : public dsn::service_app\n{\npublic:\n    gtest_app(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    dsn::error_code start(const std::vector<std::string> &args) override\n    {\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return dsn::ERR_OK;\n    }\n\n    dsn::error_code stop(bool) override { return dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<gtest_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    dsn_exit(g_test_ret);\n}\n"
  },
  {
    "path": "src/replica/split/test/replica_split_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica/split/replica_split_manager.h\"\n#include \"replica/test/replica_test_base.h\"\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_split_test : public replica_test_base\n{\npublic:\n    replica_split_test()\n    {\n        mock_app_info();\n        _parent_replica = stub->generate_replica_ptr(\n            _app_info, PARENT_GPID, partition_status::PS_PRIMARY, INIT_BALLOT);\n        _parent_split_mgr = make_unique<replica_split_manager>(_parent_replica.get());\n        fail::setup();\n        fail::cfg(\"replica_update_local_configuration\", \"return()\");\n    }\n\n    ~replica_split_test() { fail::teardown(); }\n\n    /// mock functions\n\n    void mock_app_info()\n    {\n        _app_info.app_id = APP_ID;\n        _app_info.app_name = APP_NAME;\n        _app_info.app_type = \"replica\";\n        _app_info.is_stateful = true;\n        _app_info.max_replica_count = 3;\n        _app_info.partition_count = OLD_PARTITION_COUNT;\n    }\n\n    void generate_child()\n    {\n        _child_replica = stub->generate_replica_ptr(\n            _app_info, CHILD_GPID, partition_status::PS_PARTITION_SPLIT, INIT_BALLOT);\n        _child_split_mgr = make_unique<replica_split_manager>(_child_replica.get());\n    }\n\n    void generate_child(bool is_prepare_list_copied, bool is_caught_up)\n    {\n        generate_child();\n        _child_replica->_split_states.parent_gpid = PARENT_GPID;\n        _child_replica->_split_states.is_prepare_list_copied = is_prepare_list_copied;\n        _child_replica->_split_states.is_caught_up = is_caught_up;\n    }\n\n    void mock_child_split_context(bool is_prepare_list_copied, bool is_caught_up)\n    {\n        _child_replica->set_partition_status(partition_status::PS_PARTITION_SPLIT);\n        _child_replica->_split_states.parent_gpid = PARENT_GPID;\n        _child_replica->_split_states.is_prepare_list_copied = is_prepare_list_copied;\n        _child_replica->_split_states.is_caught_up = is_caught_up;\n    }\n\n    void mock_parent_split_context(partition_status::type status)\n    {\n        parent_set_split_status(split_status::SPLITTING);\n        _parent_split_mgr->_child_gpid = CHILD_GPID;\n        _parent_split_mgr->_child_init_ballot = INIT_BALLOT;\n        _parent_replica->set_partition_status(status);\n    }\n\n    void mock_primary_parent_split_context(bool sync_send_write_request,\n                                           bool will_all_caught_up = false)\n    {\n        mock_parent_split_context(partition_status::PS_PRIMARY);\n        _parent_replica->_primary_states.statuses.clear();\n        _parent_replica->_primary_states.statuses[PRIMARY] = partition_status::PS_PRIMARY;\n        _parent_replica->_primary_states.statuses[SECONDARY] = partition_status::PS_SECONDARY;\n        _parent_replica->_primary_states.statuses[SECONDARY2] = partition_status::PS_SECONDARY;\n        _parent_replica->_primary_states.sync_send_write_request = sync_send_write_request;\n        if (!sync_send_write_request) {\n            _parent_replica->_primary_states.caught_up_children.insert(SECONDARY);\n            if (will_all_caught_up) {\n                _parent_replica->_primary_states.caught_up_children.insert(SECONDARY2);\n            }\n        }\n    }\n\n    void mock_shared_log()\n    {\n        mock_mutation_log_shared_ptr shared_log_mock = new mock_mutation_log_shared(\"./\");\n        stub->set_log(shared_log_mock);\n    }\n\n    void mock_private_log(gpid pid, mock_replica_ptr rep, bool mock_log_file_flag)\n    {\n        mock_mutation_log_private_ptr private_log_mock = new mock_mutation_log_private(pid, rep);\n        if (mock_log_file_flag) {\n            mock_log_file_ptr log_file_mock = new mock_log_file(\"log.1.0.txt\", 0);\n            log_file_mock->set_file_size(100);\n            private_log_mock->add_log_file(log_file_mock);\n        }\n        rep->_private_log = private_log_mock;\n    }\n\n    void mock_prepare_list(mock_replica_ptr rep, bool add_to_plog)\n    {\n        _mock_plist = new prepare_list(rep, 1, MAX_COUNT, [](mutation_ptr mu) {});\n        for (int i = 1; i < MAX_COUNT + 1; ++i) {\n            mutation_ptr mu = new mutation();\n            mu->data.header.decree = i;\n            mu->data.header.ballot = INIT_BALLOT;\n            _mock_plist->put(mu);\n            if (add_to_plog) {\n                rep->_private_log->append(mu, LPC_WRITE_REPLICATION_LOG_PRIVATE, nullptr, nullptr);\n                mu->set_logged();\n            }\n        }\n        rep->_prepare_list.reset(_mock_plist);\n    }\n\n    void mock_parent_states()\n    {\n        mock_shared_log();\n        mock_private_log(PARENT_GPID, _parent_replica, true);\n        mock_prepare_list(_parent_replica, true);\n    }\n\n    void mock_mutation_list(decree min_decree)\n    {\n        // mock mutation list\n        for (int d = 1; d < MAX_COUNT; ++d) {\n            mutation_ptr mu = _mock_plist->get_mutation_by_decree(d);\n            if (d > min_decree) {\n                _mutation_list.push_back(mu);\n            }\n        }\n    }\n\n    void\n    mock_child_async_learn_states(mock_replica_ptr plist_rep, bool add_to_plog, decree min_decree)\n    {\n        mock_shared_log();\n        mock_private_log(CHILD_GPID, _child_replica, false);\n        mock_prepare_list(plist_rep, add_to_plog);\n        // mock_learn_state\n        _mock_learn_state.to_decree_included = DECREE;\n        _mock_learn_state.files.push_back(\"fake_file_name\");\n        // mock parent private log files\n        _private_log_files.push_back(\"log.1.0.txt\");\n        // mock mutation list\n        mock_mutation_list(min_decree);\n    }\n\n    void mock_parent_primary_configuration(bool lack_of_secondary = false)\n    {\n        partition_configuration config;\n        config.max_replica_count = 3;\n        config.pid = PARENT_GPID;\n        config.ballot = INIT_BALLOT;\n        config.primary = PRIMARY;\n        config.secondaries.emplace_back(SECONDARY);\n        if (!lack_of_secondary) {\n            config.secondaries.emplace_back(SECONDARY2);\n        }\n        _parent_replica->set_primary_partition_configuration(config);\n    }\n\n    void mock_update_child_partition_count_request(update_child_group_partition_count_request &req,\n                                                   ballot b)\n    {\n        req.child_pid = CHILD_GPID;\n        req.ballot = b;\n        req.target_address = PRIMARY;\n        req.new_partition_count = NEW_PARTITION_COUNT;\n    }\n\n    /// test functions\n    void test_parent_start_split(ballot b, gpid req_child_gpid, split_status::type status)\n    {\n        parent_set_split_status(status);\n\n        group_check_request req;\n        req.config.ballot = b;\n        req.config.status = partition_status::PS_PRIMARY;\n        req.__set_child_gpid(req_child_gpid);\n\n        _parent_split_mgr->parent_start_split(req);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_child_init_replica()\n    {\n        _child_replica = stub->generate_replica_ptr(\n            _app_info, CHILD_GPID, partition_status::PS_INACTIVE, INIT_BALLOT);\n        _child_split_mgr = make_unique<replica_split_manager>(_child_replica.get());\n        _child_split_mgr->child_init_replica(PARENT_GPID, PRIMARY, INIT_BALLOT);\n        // check_state_task will cost 3 seconds, cancel it immediatly\n        bool finished = false;\n        _child_replica->_split_states.check_state_task->cancel(false, &finished);\n        if (finished) {\n            _child_replica->_split_states.check_state_task = nullptr;\n        }\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    bool test_parent_check_states()\n    {\n        bool flag = _parent_split_mgr->parent_check_states();\n        _parent_replica->tracker()->wait_outstanding_tasks();\n        return flag;\n    }\n\n    void test_child_copy_prepare_list()\n    {\n        mock_child_async_learn_states(_parent_replica, false, DECREE);\n        std::shared_ptr<prepare_list> plist =\n            std::make_shared<prepare_list>(_parent_replica, *_mock_plist);\n        _child_split_mgr->child_copy_prepare_list(_mock_learn_state,\n                                                  _mutation_list,\n                                                  _private_log_files,\n                                                  TOTAL_FILE_SIZE,\n                                                  std::move(plist));\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_child_learn_states()\n    {\n        mock_child_async_learn_states(_child_replica, true, DECREE);\n        _child_split_mgr->child_learn_states(\n            _mock_learn_state, _mutation_list, _private_log_files, TOTAL_FILE_SIZE, DECREE);\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_child_apply_private_logs()\n    {\n        mock_child_async_learn_states(_child_replica, true, 0);\n        _child_split_mgr->child_apply_private_logs(\n            _private_log_files, _mutation_list, TOTAL_FILE_SIZE, DECREE);\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_child_catch_up_states(decree local_decree, decree goal_decree, decree min_decree)\n    {\n        mock_child_async_learn_states(_child_replica, true, 0);\n        _child_replica->set_app_last_committed_decree(local_decree);\n        if (local_decree < goal_decree) {\n            // set prepare_list's start_decree = {min_decree}\n            _child_replica->prepare_list_truncate(min_decree);\n            // set prepare_list's last_committed_decree = {goal_decree}\n            _child_replica->prepare_list_commit_hard(goal_decree);\n        }\n        _child_split_mgr->child_catch_up_states();\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    error_code test_parent_handle_child_catch_up(ballot child_ballot)\n    {\n        _parent_split_mgr->_child_gpid = CHILD_GPID;\n\n        notify_catch_up_request req;\n        req.child_gpid = CHILD_GPID;\n        req.parent_gpid = PARENT_GPID;\n        req.child_ballot = child_ballot;\n        req.child_address = PRIMARY;\n\n        notify_cacth_up_response resp;\n        _parent_split_mgr->parent_handle_child_catch_up(req, resp);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n        return resp.err;\n    }\n\n    void test_update_child_group_partition_count()\n    {\n        _parent_split_mgr->update_child_group_partition_count(NEW_PARTITION_COUNT);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    error_code test_on_update_child_group_partition_count(ballot b)\n    {\n        update_child_group_partition_count_request req;\n        mock_update_child_partition_count_request(req, b);\n\n        update_child_group_partition_count_response resp;\n        _child_split_mgr->on_update_child_group_partition_count(req, resp);\n        _child_replica->tracker()->wait_outstanding_tasks();\n        return resp.err;\n    }\n\n    error_code test_on_update_child_group_partition_count_reply(error_code resp_err)\n    {\n        update_child_group_partition_count_request req;\n        mock_update_child_partition_count_request(req, INIT_BALLOT);\n        update_child_group_partition_count_response resp;\n        resp.err = resp_err;\n        auto not_replied_addresses = std::make_shared<std::unordered_set<rpc_address>>();\n        not_replied_addresses->insert(PRIMARY);\n\n        _parent_split_mgr->on_update_child_group_partition_count_reply(\n            ERR_OK, req, resp, not_replied_addresses);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n        _child_replica->tracker()->wait_outstanding_tasks();\n        return resp.err;\n    }\n\n    void test_register_child_on_meta()\n    {\n        parent_set_split_status(split_status::SPLITTING);\n        _parent_split_mgr->register_child_on_meta(INIT_BALLOT);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_on_register_child_reply(partition_status::type status, dsn::error_code resp_err)\n    {\n        stub->set_state_connected();\n        stub->set_rpc_address(PRIMARY);\n        mock_parent_split_context(status);\n        _parent_replica->_primary_states.sync_send_write_request = true;\n        _parent_split_mgr->_partition_version = -1;\n        _parent_replica->_inactive_is_transient = true;\n\n        register_child_request req;\n        req.app = _app_info;\n        req.parent_config.pid = PARENT_GPID;\n        req.parent_config.ballot = INIT_BALLOT;\n        req.parent_config.last_committed_decree = DECREE;\n        req.parent_config.primary = PRIMARY;\n        req.child_config.pid = CHILD_GPID;\n        req.child_config.ballot = INIT_BALLOT + 1;\n        req.child_config.last_committed_decree = 0;\n        req.primary_address = PRIMARY;\n\n        register_child_response resp;\n        resp.err = resp_err;\n        resp.app = req.app;\n        resp.app.partition_count *= 2;\n        resp.parent_config = req.parent_config;\n        resp.child_config = req.child_config;\n\n        _parent_split_mgr->on_register_child_on_meta_reply(ERR_OK, req, resp);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_trigger_primary_parent_split(split_status::type meta_split_status,\n                                           split_status::type local_split_status,\n                                           int32_t old_partition_version)\n    {\n        parent_set_split_status(local_split_status);\n        _parent_split_mgr->_partition_version.store(old_partition_version);\n        _parent_split_mgr->trigger_primary_parent_split(NEW_PARTITION_COUNT, meta_split_status);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    group_check_response test_trigger_secondary_parent_split(split_status::type meta_split_status,\n                                                             split_status::type local_split_status)\n    {\n        _parent_replica->set_partition_status(partition_status::PS_SECONDARY);\n        parent_set_split_status(local_split_status);\n\n        group_check_request req;\n        req.app = _parent_replica->_app_info;\n        req.config.ballot = INIT_BALLOT;\n        req.config.status = partition_status::PS_SECONDARY;\n        req.node = SECONDARY;\n        if (meta_split_status == split_status::PAUSING ||\n            meta_split_status == split_status::CANCELING) {\n            req.__set_meta_split_status(meta_split_status);\n        }\n        if (meta_split_status == split_status::NOT_SPLIT) {\n            req.app.partition_count *= 2;\n        }\n\n        group_check_response resp;\n        _parent_split_mgr->trigger_secondary_parent_split(req, resp);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n\n        return resp;\n    }\n\n    void test_primary_parent_handle_stop_split(split_status::type meta_split_status,\n                                               bool lack_of_secondary,\n                                               bool will_all_stop)\n    {\n        _parent_replica->set_partition_status(partition_status::PS_PRIMARY);\n        _parent_replica->_primary_states.statuses[PRIMARY] = partition_status::PS_PRIMARY;\n        _parent_replica->_primary_states.statuses[SECONDARY] = partition_status::PS_SECONDARY;\n        _parent_replica->_primary_states.statuses[SECONDARY2] =\n            lack_of_secondary ? partition_status::PS_POTENTIAL_SECONDARY\n                              : partition_status::PS_SECONDARY;\n        _parent_replica->_primary_states.sync_send_write_request = true;\n        _parent_replica->_primary_states.split_stopped_secondary.clear();\n        mock_parent_primary_configuration(lack_of_secondary);\n\n        std::shared_ptr<group_check_request> req = std::make_shared<group_check_request>();\n        std::shared_ptr<group_check_response> resp = std::make_shared<group_check_response>();\n        req->node = SECONDARY;\n        if (meta_split_status != split_status::NOT_SPLIT) {\n            req->__set_meta_split_status(meta_split_status);\n        }\n\n        if (meta_split_status == split_status::PAUSING ||\n            meta_split_status == split_status::CANCELING) {\n            resp->__set_is_split_stopped(true);\n            if (will_all_stop) {\n                _parent_replica->_primary_states.split_stopped_secondary.insert(SECONDARY2);\n            }\n        }\n\n        _parent_split_mgr->primary_parent_handle_stop_split(req, resp);\n        _parent_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    void test_on_query_child_state_reply()\n    {\n        _parent_split_mgr->_partition_version.store(-1);\n\n        query_child_state_request req;\n        req.app_name = APP_NAME;\n        req.partition_count = OLD_PARTITION_COUNT;\n        req.pid = PARENT_GPID;\n\n        partition_configuration child_config;\n        child_config.pid = CHILD_GPID;\n        child_config.ballot = INIT_BALLOT + 1;\n        child_config.last_committed_decree = 0;\n\n        query_child_state_response resp;\n        resp.err = ERR_OK;\n        resp.__set_partition_count(NEW_PARTITION_COUNT);\n        resp.__set_child_config(child_config);\n\n        _parent_split_mgr->on_query_child_state_reply(ERR_OK, req, resp);\n        _parent_split_mgr->tracker()->wait_outstanding_tasks();\n        _child_split_mgr->tracker()->wait_outstanding_tasks();\n    }\n\n    bool test_check_partition_hash(const int32_t &partition_version, const uint64_t &partition_hash)\n    {\n        _parent_split_mgr->_partition_version.store(partition_version);\n        return _parent_split_mgr->check_partition_hash(partition_hash, \"write\");\n    }\n\n    /// helper functions\n    void cleanup_prepare_list(mock_replica_ptr rep) { rep->_prepare_list->reset(0); }\n    void cleanup_child_split_context()\n    {\n        _child_replica->_split_states.cleanup(true);\n        _child_replica->tracker()->wait_outstanding_tasks();\n    }\n\n    int32_t child_get_prepare_list_count() { return _child_replica->get_plist()->count(); }\n    bool child_is_prepare_list_copied()\n    {\n        return _child_replica->_split_states.is_prepare_list_copied;\n    }\n    bool child_is_caught_up() { return _child_replica->_split_states.is_caught_up; }\n\n    split_status::type parent_get_split_status() { return _parent_split_mgr->_split_status; }\n    void parent_set_split_status(split_status::type status)\n    {\n        _parent_split_mgr->_split_status = status;\n    }\n\n    primary_context get_replica_primary_context(mock_replica_ptr rep)\n    {\n        return rep->_primary_states;\n    }\n    bool parent_sync_send_write_request()\n    {\n        return _parent_replica->_primary_states.sync_send_write_request;\n    }\n    int32_t parent_stopped_split_size()\n    {\n        return _parent_replica->_primary_states.split_stopped_secondary.size();\n    }\n    bool is_parent_not_in_split()\n    {\n        return _parent_split_mgr->_child_gpid.get_app_id() == 0 &&\n               _parent_split_mgr->_child_init_ballot == 0 &&\n               _parent_split_mgr->_split_status == split_status::NOT_SPLIT;\n    }\n    bool primary_parent_not_in_split()\n    {\n        auto context = _parent_replica->_primary_states;\n        return context.caught_up_children.size() == 0 && context.register_child_task == nullptr &&\n               context.sync_send_write_request == false && context.query_child_task == nullptr &&\n               context.split_stopped_secondary.size() == 0 && is_parent_not_in_split();\n    }\n\npublic:\n    const std::string APP_NAME = \"split_table\";\n    const int32_t APP_ID = 2;\n    const int32_t OLD_PARTITION_COUNT = 8;\n    const int32_t NEW_PARTITION_COUNT = 16;\n    const rpc_address PRIMARY = rpc_address(\"127.0.0.1\", 18230);\n    const rpc_address SECONDARY = rpc_address(\"127.0.0.2\", 10058);\n    const rpc_address SECONDARY2 = rpc_address(\"127.0.0.3\", 10805);\n    const gpid PARENT_GPID = gpid(APP_ID, 1);\n    const gpid CHILD_GPID = gpid(APP_ID, 9);\n    const ballot INIT_BALLOT = 3;\n    const decree DECREE = 5;\n    const int32_t MAX_COUNT = 10;\n    const uint64_t TOTAL_FILE_SIZE = 100;\n\n    mock_replica_ptr _parent_replica;\n    mock_replica_ptr _child_replica;\n    std::unique_ptr<replica_split_manager> _parent_split_mgr;\n    std::unique_ptr<replica_split_manager> _child_split_mgr;\n\n    app_info _app_info;\n    std::vector<std::string> _private_log_files;\n    std::vector<mutation_ptr> _mutation_list;\n    prepare_list *_mock_plist;\n    learn_state _mock_learn_state;\n};\n\n// parent_start_split tests\nTEST_F(replica_split_test, parent_start_split_tests)\n{\n    fail::cfg(\"replica_stub_create_child_replica_if_not_found\", \"return()\");\n    fail::cfg(\"replica_child_init_replica\", \"return()\");\n\n    ballot WRONG_BALLOT = 2;\n\n    // Test cases:\n    // - wrong ballot\n    // - partition has already executing splitting\n    // - old add child request\n    // - start succeed\n    struct start_split_test\n    {\n        ballot req_ballot;\n        gpid req_child_gpid;\n        split_status::type local_split_status;\n        split_status::type expected_split_status;\n        bool start_split_succeed;\n    } tests[] = {\n        {WRONG_BALLOT, CHILD_GPID, split_status::NOT_SPLIT, split_status::NOT_SPLIT, false},\n        {INIT_BALLOT, CHILD_GPID, split_status::SPLITTING, split_status::SPLITTING, false},\n        {INIT_BALLOT, PARENT_GPID, split_status::NOT_SPLIT, split_status::NOT_SPLIT, false},\n        {INIT_BALLOT, CHILD_GPID, split_status::NOT_SPLIT, split_status::SPLITTING, true}};\n    for (auto test : tests) {\n        test_parent_start_split(test.req_ballot, test.req_child_gpid, test.local_split_status);\n        ASSERT_EQ(parent_get_split_status(), test.expected_split_status);\n        if (test.start_split_succeed) {\n            ASSERT_EQ(_parent_split_mgr->get_partition_version(), OLD_PARTITION_COUNT - 1);\n            stub->get_replica(CHILD_GPID)->tracker()->wait_outstanding_tasks();\n            ASSERT_EQ(stub->get_replica(CHILD_GPID)->status(), partition_status::PS_INACTIVE);\n        }\n    }\n}\n\n// child_init_replica test\nTEST_F(replica_split_test, child_init_replica_test)\n{\n    fail::cfg(\"replica_stub_split_replica_exec\", \"return()\");\n    test_child_init_replica();\n    ASSERT_EQ(_child_replica->status(), partition_status::PS_PARTITION_SPLIT);\n    ASSERT_FALSE(child_is_prepare_list_copied());\n    ASSERT_FALSE(child_is_caught_up());\n}\n\n// parent_check_states tests\nTEST_F(replica_split_test, parent_check_states_tests)\n{\n    fail::cfg(\"replica_stub_split_replica_exec\", \"return()\");\n\n    // Test cases:\n    // - wrong parent partition status\n    // - check parent states succeed\n    struct parent_check_state_test\n    {\n        partition_status::type parent_status;\n        bool expected_flag;\n    } tests[] = {{partition_status::PS_POTENTIAL_SECONDARY, false},\n                 {partition_status::PS_SECONDARY, true}};\n    for (auto test : tests) {\n        mock_parent_split_context(test.parent_status);\n        ASSERT_EQ(test_parent_check_states(), test.expected_flag);\n    }\n}\n\n// child_copy_prepare_list test\nTEST_F(replica_split_test, copy_prepare_list_succeed)\n{\n    fail::cfg(\"replica_stub_split_replica_exec\", \"return()\");\n    fail::cfg(\"replica_child_learn_states\", \"return()\");\n\n    generate_child(false, false);\n    ASSERT_FALSE(child_is_prepare_list_copied());\n    test_child_copy_prepare_list();\n    ASSERT_TRUE(child_is_prepare_list_copied());\n    ASSERT_EQ(child_get_prepare_list_count(), MAX_COUNT);\n\n    cleanup_prepare_list(_parent_replica);\n    cleanup_prepare_list(_child_replica);\n    cleanup_child_split_context();\n}\n\n// child_learn_states tests\nTEST_F(replica_split_test, child_learn_states_tests)\n{\n    generate_child();\n\n    // Test cases:\n    // - mock replay private log error\n    // - child learn states succeed\n    struct child_learn_state_test\n    {\n        bool mock_replay_log_error;\n        partition_status::type expected_child_status;\n    } tests[] = {{true, partition_status::PS_ERROR}, {false, partition_status::PS_PARTITION_SPLIT}};\n    for (auto test : tests) {\n        fail::setup();\n        fail::cfg(\"replica_child_catch_up_states\", \"return()\");\n        fail::cfg(\"replica_stub_split_replica_exec\", \"return()\");\n        if (test.mock_replay_log_error) {\n            fail::cfg(\"replica_child_apply_private_logs\", \"return(ERR_INVALID_STATE)\");\n        } else {\n            fail::cfg(\"replica_child_apply_private_logs\", \"return()\");\n        }\n        mock_child_split_context(true, false);\n        test_child_learn_states();\n        ASSERT_EQ(_child_replica->status(), test.expected_child_status);\n\n        cleanup_prepare_list(_child_replica);\n        cleanup_child_split_context();\n        fail::teardown();\n    }\n}\n\n// child_apply_private_logs test\nTEST_F(replica_split_test, child_apply_private_logs_succeed)\n{\n    fail::cfg(\"mutation_log_replay_succeed\", \"return()\");\n    fail::cfg(\"replication_app_base_apply_mutation\", \"return()\");\n\n    generate_child(true, false);\n    test_child_apply_private_logs();\n    ASSERT_EQ(child_get_prepare_list_count(), MAX_COUNT);\n\n    cleanup_prepare_list(_child_replica);\n    cleanup_child_split_context();\n}\n\n// child_catch_up_states tests\nTEST_F(replica_split_test, child_catch_up_states_tests)\n{\n    fail::cfg(\"replica_child_notify_catch_up\", \"return()\");\n    fail::cfg(\"replication_app_base_apply_mutation\", \"return()\");\n    generate_child();\n\n    // Test cases:\n    // - child catch up with all states learned\n    // - child catch up with in-memory-mutations learned\n    struct child_catch_up_state_test\n    {\n        decree goal_decree;\n        decree min_decree;\n    } tests[] = {{DECREE, DECREE}, {MAX_COUNT - 1, 1}};\n    for (auto test : tests) {\n        mock_child_split_context(true, false);\n        test_child_catch_up_states(DECREE, test.goal_decree, test.min_decree);\n        ASSERT_TRUE(child_is_caught_up());\n\n        cleanup_prepare_list(_child_replica);\n        cleanup_child_split_context();\n    }\n}\n\n// parent_handle_child_catch_up tests\nTEST_F(replica_split_test, parent_handle_catch_up_test)\n{\n    fail::cfg(\"replica_parent_check_sync_point_commit\", \"return()\");\n    ballot WRONG_BALLOT = 1;\n\n    // Test cases:\n    // - request has wrong ballot\n    // - not all child caught up\n    // - all child caught up\n    struct parent_handle_catch_up_test\n    {\n        ballot req_ballot;\n        bool will_all_caught_up;\n        error_code expected_err;\n        bool sync_send_write_request;\n    } tests[] = {{WRONG_BALLOT, false, ERR_INVALID_STATE, false},\n                 {INIT_BALLOT, false, ERR_OK, false},\n                 {INIT_BALLOT, true, ERR_OK, true}};\n    for (auto test : tests) {\n        mock_primary_parent_split_context(false, test.will_all_caught_up);\n        ASSERT_EQ(test_parent_handle_child_catch_up(test.req_ballot), test.expected_err);\n        ASSERT_EQ(parent_sync_send_write_request(), test.sync_send_write_request);\n    }\n}\n\n// update_child_group_partition_count tests\nTEST_F(replica_split_test, update_child_group_partition_count_test)\n{\n    fail::cfg(\"replica_parent_update_partition_count_request\", \"return()\");\n    generate_child();\n\n    // Test cases:\n    // - wrong split status\n    // - primary has learner\n    // - update child group partition count succeed\n    struct update_child_group_partition_count_test\n    {\n        split_status::type parent_split_status;\n        bool parent_has_learner;\n        partition_status::type expected_child_status;\n        bool expected_sync_send_write_request;\n        bool is_parent_not_in_split;\n\n    } tests[] = {\n        {split_status::NOT_SPLIT, false, partition_status::PS_ERROR, false, true},\n        {split_status::SPLITTING, true, partition_status::PS_ERROR, false, true},\n        {split_status::SPLITTING, false, partition_status::PS_PARTITION_SPLIT, true, false},\n    };\n    for (auto test : tests) {\n        mock_child_split_context(true, true);\n        mock_parent_primary_configuration(test.parent_has_learner);\n        mock_primary_parent_split_context(true);\n        parent_set_split_status(test.parent_split_status);\n\n        test_update_child_group_partition_count();\n        ASSERT_EQ(_child_replica->status(), test.expected_child_status);\n        ASSERT_EQ(parent_sync_send_write_request(), test.expected_sync_send_write_request);\n        ASSERT_EQ(is_parent_not_in_split(), test.is_parent_not_in_split);\n    }\n}\n\n// on_update_child_group_partition_count tests\nTEST_F(replica_split_test, child_update_partition_count_test)\n{\n    ballot WRONG_BALLOT = INIT_BALLOT + 1;\n    generate_child();\n\n    // Test cases:\n    // - request has wrong ballot\n    // - child not caught up\n    // - child update partition count succeed\n    struct on_update_child_partition_count_test\n    {\n        ballot req_ballot;\n        bool caught_up;\n        error_code expected_err;\n        int32_t expected_partition_version;\n    } tests[] = {{WRONG_BALLOT, true, ERR_VERSION_OUTDATED, OLD_PARTITION_COUNT - 1},\n                 {INIT_BALLOT, false, ERR_VERSION_OUTDATED, OLD_PARTITION_COUNT - 1},\n                 {INIT_BALLOT, true, ERR_OK, NEW_PARTITION_COUNT - 1}};\n    for (auto test : tests) {\n        mock_child_split_context(true, test.caught_up);\n        ASSERT_EQ(_child_split_mgr->get_partition_version(), OLD_PARTITION_COUNT - 1);\n        ASSERT_EQ(test_on_update_child_group_partition_count(test.req_ballot), test.expected_err);\n        ASSERT_EQ(_child_split_mgr->get_partition_version(), test.expected_partition_version);\n    }\n}\n\n// on_update_child_group_partition_count_reply tests\nTEST_F(replica_split_test, parent_on_update_partition_reply_test)\n{\n    fail::cfg(\"replica_register_child_on_meta\", \"return()\");\n    generate_child();\n\n    // Test cases:\n    // - wrong split status\n    // - child update partition_count failed\n    // - child update partition_count succeed\n    struct on_update_child_partition_count_reply_test\n    {\n        split_status::type parent_split_status;\n        error_code resp_err;\n        partition_status::type expected_child_status;\n        bool expected_sync_send_write_request;\n        bool is_parent_not_in_split;\n    } tests[] = {\n        {split_status::NOT_SPLIT, ERR_OK, partition_status::PS_ERROR, false, true},\n        {split_status::SPLITTING, ERR_VERSION_OUTDATED, partition_status::PS_ERROR, false, true},\n        {split_status::SPLITTING, ERR_OK, partition_status::PS_PARTITION_SPLIT, true, false},\n    };\n    for (auto test : tests) {\n        mock_primary_parent_split_context(true);\n        parent_set_split_status(test.parent_split_status);\n        mock_child_split_context(true, true);\n\n        test_on_update_child_group_partition_count_reply(test.resp_err);\n        ASSERT_EQ(_child_replica->status(), test.expected_child_status);\n        ASSERT_EQ(parent_sync_send_write_request(), test.expected_sync_send_write_request);\n        ASSERT_EQ(is_parent_not_in_split(), test.is_parent_not_in_split);\n    }\n}\n\n// register_child test\nTEST_F(replica_split_test, register_child_test)\n{\n    fail::cfg(\"replica_parent_send_register_request\", \"return()\");\n    test_register_child_on_meta();\n    ASSERT_EQ(_parent_replica->status(), partition_status::PS_INACTIVE);\n    ASSERT_EQ(_parent_split_mgr->get_partition_version(), -1);\n}\n\n// register_child_reply tests\nTEST_F(replica_split_test, register_child_reply_test)\n{\n    fail::cfg(\"replica_init_group_check\", \"return()\");\n    fail::cfg(\"replica_broadcast_group_check\", \"return()\");\n    generate_child();\n\n    // Test cases:\n    // - wrong partition status\n    // - response error = INVALID_STATE\n    // - response error = CHILD_REGISTERED\n    // - response error = OK\n    struct register_child_reply_test\n    {\n        partition_status::type parent_partition_status;\n        error_code resp_err;\n        int32_t expected_parent_partition_version;\n    } tests[] = {{partition_status::PS_PRIMARY, ERR_OK, -1},\n                 {partition_status::PS_INACTIVE, ERR_INVALID_STATE, -1},\n                 {partition_status::PS_INACTIVE, ERR_CHILD_REGISTERED, -1},\n                 {partition_status::PS_INACTIVE, ERR_OK, NEW_PARTITION_COUNT - 1}};\n    for (auto test : tests) {\n        mock_child_split_context(true, true);\n        test_on_register_child_reply(test.parent_partition_status, test.resp_err);\n        ASSERT_EQ(_parent_replica->status(), partition_status::PS_PRIMARY);\n        if (test.parent_partition_status == partition_status::PS_INACTIVE) {\n            ASSERT_TRUE(primary_parent_not_in_split());\n            ASSERT_EQ(_parent_split_mgr->get_partition_version(),\n                      test.expected_parent_partition_version);\n        }\n    }\n}\n\n// trigger_primary_parent_split unit test\nTEST_F(replica_split_test, trigger_primary_parent_split_test)\n{\n    fail::cfg(\"replica_broadcast_group_check\", \"return()\");\n    generate_child();\n\n    // Test cases:\n    // - meta splitting with lack of secondary\n    // - meta splitting with local not_split(See parent_start_split_tests)\n    // - meta splitting with local splitting(See parent_start_split_tests)\n    // - meta pausing with local not_split\n    // - meta pausing with local splitting\n    // - meta canceling with local not_split\n    // - meta canceling with local splitting\n    // - meta paused with local not_split\n    // - meta not_split with local splitting(See query_child_tests)\n    struct primary_parent_test\n    {\n        bool lack_of_secondary;\n        split_status::type meta_split_status;\n        int32_t old_partition_version;\n        split_status::type old_split_status;\n    } tests[]{{true, split_status::SPLITTING, OLD_PARTITION_COUNT - 1, split_status::NOT_SPLIT},\n              {false, split_status::PAUSING, -1, split_status::NOT_SPLIT},\n              {false, split_status::PAUSING, OLD_PARTITION_COUNT - 1, split_status::SPLITTING},\n              {false, split_status::CANCELING, OLD_PARTITION_COUNT - 1, split_status::NOT_SPLIT},\n              {false, split_status::CANCELING, -1, split_status::SPLITTING},\n              {false, split_status::PAUSED, OLD_PARTITION_COUNT - 1, split_status::NOT_SPLIT}};\n    for (const auto &test : tests) {\n        mock_parent_primary_configuration(test.lack_of_secondary);\n        if (test.old_split_status == split_status::SPLITTING) {\n            mock_child_split_context(true, true);\n            mock_primary_parent_split_context(true);\n        }\n        test_trigger_primary_parent_split(\n            test.meta_split_status, test.old_split_status, test.old_partition_version);\n        ASSERT_EQ(_parent_split_mgr->get_partition_version(), OLD_PARTITION_COUNT - 1);\n        ASSERT_FALSE(parent_sync_send_write_request());\n        if (test.old_split_status == split_status::SPLITTING) {\n            _child_replica->tracker()->wait_outstanding_tasks();\n            ASSERT_EQ(_child_replica->status(), partition_status::PS_ERROR);\n        }\n    }\n}\n\n// trigger_secondary_parent_split unit test\nTEST_F(replica_split_test, secondary_handle_split_test)\n{\n    generate_child();\n\n    // Test cases:\n    // - secondary parent update partition_count\n    // - meta splitting with local not_split(See parent_start_split_tests)\n    // - meta splitting with local splitting(See parent_start_split_tests)\n    // - meta pausing with local splitting\n    // - meta canceling with local not_split\n    // - meta canceling with local splitting\n    // - meta paused with local not_split\n    struct trigger_secondary_parent_split_test\n    {\n        split_status::type meta_split_status;\n        split_status::type local_split_status;\n        int32_t expected_partition_version;\n    } tests[]{\n        {split_status::PAUSING, split_status::NOT_SPLIT, OLD_PARTITION_COUNT - 1},\n        {split_status::PAUSING, split_status::SPLITTING, OLD_PARTITION_COUNT - 1},\n        {split_status::CANCELING, split_status::NOT_SPLIT, OLD_PARTITION_COUNT - 1},\n        {split_status::CANCELING, split_status::SPLITTING, OLD_PARTITION_COUNT - 1},\n        {split_status::NOT_SPLIT, split_status::SPLITTING, NEW_PARTITION_COUNT - 1},\n    };\n\n    for (auto test : tests) {\n        if (test.local_split_status == split_status::SPLITTING) {\n            mock_child_split_context(true, true);\n            mock_parent_split_context(partition_status::PS_SECONDARY);\n        }\n        auto resp =\n            test_trigger_secondary_parent_split(test.meta_split_status, test.local_split_status);\n        ASSERT_EQ(resp.err, ERR_OK);\n        ASSERT_TRUE(is_parent_not_in_split());\n        ASSERT_EQ(_parent_split_mgr->get_partition_version(), test.expected_partition_version);\n        if (test.meta_split_status == split_status::PAUSING ||\n            test.meta_split_status == split_status::CANCELING) {\n            ASSERT_TRUE(resp.__isset.is_split_stopped);\n            ASSERT_TRUE(resp.is_split_stopped);\n            if (test.local_split_status == split_status::SPLITTING) {\n                _child_replica->tracker()->wait_outstanding_tasks();\n                ASSERT_EQ(_child_replica->status(), partition_status::PS_ERROR);\n            }\n        }\n    }\n}\n\nTEST_F(replica_split_test, primary_parent_handle_stop_test)\n{\n    fail::cfg(\"replica_parent_send_notify_stop_request\", \"return()\");\n    // Test cases:\n    // - not_splitting request\n    // - splitting request\n    // - pausing request with lack of secondary\n    // - canceling request with not all secondary\n    // - group all paused\n    // - group all canceled\n    struct primary_parent_handle_stop_test\n    {\n        split_status::type meta_split_status;\n        bool lack_of_secondary;\n        bool will_all_stop;\n        int32_t expected_size;\n        bool expected_all_stopped;\n    } tests[]{{split_status::NOT_SPLIT, false, false, 0, false},\n              {split_status::SPLITTING, false, false, 0, false},\n              {split_status::PAUSING, true, false, 1, false},\n              {split_status::CANCELING, false, false, 1, false},\n              {split_status::PAUSING, false, true, 0, true},\n              {split_status::CANCELING, false, true, 0, true}};\n\n    for (auto test : tests) {\n        test_primary_parent_handle_stop_split(\n            test.meta_split_status, test.lack_of_secondary, test.will_all_stop);\n        ASSERT_EQ(parent_stopped_split_size(), test.expected_size);\n        ASSERT_EQ(primary_parent_not_in_split(), test.expected_all_stopped);\n    }\n}\n\nTEST_F(replica_split_test, query_child_state_reply_test)\n{\n    fail::cfg(\"replica_init_group_check\", \"return()\");\n    fail::cfg(\"replica_broadcast_group_check\", \"return()\");\n    generate_child(true, true);\n    mock_primary_parent_split_context(true);\n\n    test_on_query_child_state_reply();\n    ASSERT_EQ(_parent_split_mgr->get_partition_version(), NEW_PARTITION_COUNT - 1);\n    ASSERT_TRUE(primary_parent_not_in_split());\n}\n\nTEST_F(replica_split_test, check_partition_hash_test)\n{\n    uint64_t send_to_parent_after_split = 1;\n    uint64_t send_to_child_after_split = 9;\n\n    struct check_partition_hash_test\n    {\n        int32_t partition_version;\n        uint64_t partition_hash;\n        bool expected_result;\n    } tests[]{{OLD_PARTITION_COUNT - 1, send_to_parent_after_split, true},\n              {OLD_PARTITION_COUNT - 1, send_to_child_after_split, true},\n              {NEW_PARTITION_COUNT - 1, send_to_parent_after_split, true},\n              {NEW_PARTITION_COUNT - 1, send_to_child_after_split, false}};\n\n    for (const auto &test : tests) {\n        ASSERT_EQ(test_check_partition_hash(test.partition_version, test.partition_hash),\n                  test.expected_result);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/split/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\n./dsn_replica_split_test\n\nif [ $? -ne 0 ]; then\n    tail -n 100 data/log/log.1.txt\n    if [ -f core ]; then\n        gdb ./dsn_replica_split_test core -ex \"bt\"\n    fi\n    exit 1\nfi\n"
  },
  {
    "path": "src/replica/storage/CMakeLists.txt",
    "content": "add_subdirectory(simple_kv)\n"
  },
  {
    "path": "src/replica/storage/simple_kv/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.replication.simple_kv)\n\nthrift_generate_cpp(\n    SIMPLE_KV_THRIFT_SRCS\n    SIMPLE_KV_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/simple_kv.thrift\n)\n\nset(MY_PROJ_SRC ${SIMPLE_KV_THRIFT_SRCS})\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_replica_server dsn_meta_server dsn_client dsn_runtime hashtable)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(INI_FILES \"\")\nfile(GLOB\n    RES_FILES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/*.ini\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/*.sh\"\n    )\n\n# Extra files that will be installed\nset(MY_BINPLACES ${RES_FILES})\n\ndsn_add_test()\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/replica/storage/simple_kv/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf data core* out\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/config.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.meta]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.replica]\ntype = replica\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.client]\ntype = client\narguments = mycluster localhost:34601 simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\ntool = simulator\n;tool = nativerun\n;toollets = tracer\n;toollets = fault_injector\n;toollets = tracer, fault_injector\ntoollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\n;logging_start_level = LOG_LEVEL_WARNING\n;logging_factory_name = dsn::tools::screen_logger\n;logging_factory_name = dsn::tools::hpc_logger\n\n[tools.simulator]\nrandom_seed = 0\n;min_message_delay_microseconds = 0\n;max_message_delay_microseconds = 0\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replication\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\nrpc_message_delay_ms_min = 1\nrpc_message_delay_ms_max = 1000\n\ndisk_write_fail_ratio = 0.0\ndisk_read_fail_ratio = 0.00001\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.LPC_CHECKPOINT_REPLICA]\n;execution_extra_delay_us_max = 10000000\n\n[task.LPC_LEARN_REMOTE_DELTA_FILES]\n;execution_extra_delay_us_max = 10000000\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\nrpc_call_channel = RPC_CHANNEL_UDP\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\nrpc_call_channel = RPC_CHANNEL_UDP\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_PREPARE]\nrpc_request_resend_timeout_milliseconds = 8000\n\n[task.LPC_DAEMON_APPS_CHECK_TIMER]\nis_trace = false\n\n[meta_server]\nserver_list = localhost:34601\nmin_live_node_count_for_unfreeze = 1\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\nstateful = true\n\n[replication]\nprepare_timeout_ms_for_secondaries = 10000\nprepare_timeout_ms_for_potential_secondaries = 20000\n\nlearn_timeout_ms = 30000\nstaleness_for_commit = 20\nstaleness_for_start_prepare_for_potential_secondary = 110\nmutation_max_size_mb = 15\nmutation_max_pending_time_ms = 20\nmutation_2pc_min_replica_count = 2\n\nprepare_list_max_size_mb = 250\nrequest_batch_disabled = false\ngroup_check_internal_ms = 100000\ngroup_check_disabled = false\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 14\nfd_grace_seconds = 15\nworking_dir = .\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = true\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = false\n\nconfig_sync_interval_ms = 60000\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ ! -f dsn.replication.simple_kv ]; then\n    echo \"dsn.replication.simple_kv not exist\"\n    exit 1\nfi\n\n./clear.sh\n\necho \"running dsn.replication.simple_kv for 20 seconds ...\"\n./dsn.replication.simple_kv config.ini &>out &\nPID=$!\nsleep 20\nkill $PID\n\nif [ -f core ] || ! grep ERR_OK out > /dev/null ; then\n    echo \"run dsn.replication.simple_kv failed\"\n    echo \"---- ls ----\"\n    ls -l\n    echo \"---- head -n 100 out ----\"\n    head -n 100 out\n    if [ -f data/logs/log.1.txt ]; then\n        echo \"---- tail -n 100 log.1.txt ----\"\n        tail -n 100 data/logs/log.1.txt\n    fi\n    if [ -f core ]; then\n        echo \"---- gdb ./dsn.replication.simple_kv core ----\"\n        gdb ./dsn.replication.simple_kv core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n    fi\n    exit 1\nfi\n\necho \"run dsn.replication.simple_kv succeed\"\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.app.example.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n#include \"simple_kv.client.h\"\n#include \"simple_kv.server.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\n// client app example\nclass simple_kv_client_app : public ::dsn::service_app\n{\npublic:\n    simple_kv_client_app(const service_app_info *info) : ::dsn::service_app(info) {}\n\n    virtual ~simple_kv_client_app() override { stop(); }\n\n    virtual ::dsn::error_code start(const std::vector<std::string> &args)\n    {\n        if (args.size() < 2)\n            return ::dsn::ERR_INVALID_PARAMETERS;\n\n        printf(\"%s %s %s\\n\", args[1].c_str(), args[2].c_str(), args[3].c_str());\n        dsn::rpc_address meta;\n        meta.from_string_ipv4(args[2].c_str());\n        _simple_kv_client.reset(new simple_kv_client(args[1].c_str(), {meta}, args[3].c_str()));\n\n        _timer = ::dsn::tasking::enqueue_timer(LPC_SIMPLE_KV_TEST_TIMER,\n                                               &_tracker,\n                                               [this] { on_test_timer(); },\n                                               std::chrono::seconds(1));\n        return ::dsn::ERR_OK;\n    }\n\n    virtual ::dsn::error_code stop(bool cleanup = false)\n    {\n        _tracker.cancel_outstanding_tasks();\n\n        _simple_kv_client.reset();\n\n        return ::dsn::ERR_OK;\n    }\n\n    void on_test_timer()\n    {\n        // test for service simple_kv        using namespace svc_simple_kv;\n        {\n            std::string req = \"hello\";\n            // sync:\n            error_code err;\n            std::string resp;\n            std::tie(err, resp) = _simple_kv_client->read_sync(req);\n            std::cout << \"call RPC_SIMPLE_KV_SIMPLE_KV_READ end, return \" << err.to_string();\n            if (ERR_OK == err)\n                std::cout << \", read result: \" << resp;\n            std::cout << std::endl;\n            // async:\n            //_simple_kv_client->read(req, empty_rpc_handler);\n        }\n        {\n            kv_pair req;\n            req.key = \"hello\";\n            req.value = \"world\";\n            // sync:\n            error_code err;\n            int32_t resp;\n            std::tie(err, resp) = _simple_kv_client->write_sync(req);\n            std::cout << \"call RPC_SIMPLE_KV_SIMPLE_KV_WRITE end, return \" << err.to_string()\n                      << std::endl;\n            // async:\n            //_simple_kv_client->write(req, empty_rpc_handler);\n        }\n        {\n            kv_pair req;\n            req.key = \"hello\";\n            req.value = \"world\";\n            // sync:\n            error_code err;\n            int32_t resp;\n            std::tie(err, resp) = _simple_kv_client->append_sync(req);\n            std::cout << \"call RPC_SIMPLE_KV_SIMPLE_KV_APPEND end, return \" << err.to_string()\n                      << std::endl;\n\n            // async:\n            //_simple_kv_client->append(req, empty_rpc_handler);\n        }\n    }\n\nprivate:\n    ::dsn::task_ptr _timer;\n    ::dsn::rpc_address _server;\n    std::unique_ptr<simple_kv_client> _simple_kv_client;\n    dsn::task_tracker _tracker;\n};\n} // namespace application\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.client.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n#include <iostream>\n#include <dsn/utility/optional.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/replication/partition_resolver.h>\n#include \"simple_kv.code.definition.h\"\n#include \"simple_kv_types.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\nclass simple_kv_client\n{\npublic:\n    simple_kv_client(const char *cluster_name,\n                     const std::vector<dsn::rpc_address> &meta_list,\n                     const char *app_name)\n    {\n        _resolver = partition_resolver::get_resolver(cluster_name, meta_list, app_name);\n    }\n\n    simple_kv_client() {}\n\n    virtual ~simple_kv_client() {}\n\n    // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_READ ------------\n    // - synchronous\n    std::pair<::dsn::error_code, std::string>\n    read_sync(const std::string &key,\n              std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n              uint64_t partition_hash = 0)\n    {\n        return ::dsn::rpc::wait_and_unwrap<std::string>(\n            _resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_READ,\n                               key,\n                               nullptr,\n                               empty_rpc_handler,\n                               timeout,\n                               partition_hash,\n                               0));\n    }\n\n    // - asynchronous with on-stack std::string and std::string\n    template <typename TCallback>\n    ::dsn::task_ptr read(const std::string &key,\n                         TCallback &&callback,\n                         std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n                         uint64_t partition_hash = 0,\n                         int reply_thread_hash = 0)\n    {\n        return _resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_READ,\n                                  key,\n                                  nullptr,\n                                  std::forward<TCallback>(callback),\n                                  timeout,\n                                  partition_hash,\n                                  reply_thread_hash);\n    }\n\n    // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_WRITE ------------\n    // - synchronous\n    std::pair<::dsn::error_code, int32_t>\n    write_sync(const kv_pair &pr,\n               std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n               uint64_t partition_hash = 0)\n    {\n        return dsn::rpc::wait_and_unwrap<int32_t>(_resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_WRITE,\n                                                                     pr,\n                                                                     nullptr,\n                                                                     empty_rpc_handler,\n                                                                     timeout,\n                                                                     partition_hash,\n                                                                     0));\n    }\n\n    // - asynchronous with on-stack kv_pair and int32_t\n    template <typename TCallback>\n    ::dsn::task_ptr write(const kv_pair &pr,\n                          TCallback &&callback,\n                          std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n                          uint64_t partition_hash = 0,\n                          int reply_thread_hash = 0)\n    {\n        return _resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_WRITE,\n                                  pr,\n                                  nullptr,\n                                  std::forward<TCallback>(callback),\n                                  timeout,\n                                  partition_hash,\n                                  reply_thread_hash);\n    }\n\n    // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_APPEND ------------\n    // - synchronous\n    std::pair<::dsn::error_code, int32_t>\n    append_sync(const kv_pair &pr,\n                std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n                uint64_t partition_hash = 0)\n    {\n        return ::dsn::rpc::wait_and_unwrap<int32_t>(\n            _resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_APPEND,\n                               pr,\n                               nullptr,\n                               empty_rpc_handler,\n                               timeout,\n                               partition_hash,\n                               0));\n    }\n\n    // - asynchronous with on-stack kv_pair and int32_t\n    template <typename TCallback>\n    ::dsn::task_ptr append(const kv_pair &pr,\n                           TCallback &&callback,\n                           std::chrono::milliseconds timeout = std::chrono::milliseconds(0),\n                           uint64_t partition_hash = 0,\n                           int reply_thread_hash = 0)\n    {\n        return _resolver->call_op(RPC_SIMPLE_KV_SIMPLE_KV_APPEND,\n                                  pr,\n                                  nullptr,\n                                  std::forward<TCallback>(callback),\n                                  timeout,\n                                  partition_hash,\n                                  reply_thread_hash);\n    }\n\nprivate:\n    dsn::replication::partition_resolver_ptr _resolver;\n};\n} // namespace application\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.code.definition.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n#include <dsn/dist/replication/replication.codes.h>\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\n\nDEFINE_STORAGE_READ_RPC_CODE(RPC_SIMPLE_KV_SIMPLE_KV_READ)\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_SIMPLE_KV_SIMPLE_KV_WRITE, ALLOW_BATCH, IS_IDEMPOTENT)\nDEFINE_STORAGE_WRITE_RPC_CODE(RPC_SIMPLE_KV_SIMPLE_KV_APPEND, ALLOW_BATCH, NOT_IDEMPOTENT)\n\n// test timer task code\nDEFINE_TASK_CODE(LPC_SIMPLE_KV_TEST_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n// apps\n#include \"simple_kv.app.example.h\"\n#include \"simple_kv.server.impl.h\"\n\n// framework specific tools\n#include <dsn/dist/replication/meta_service_app.h>\n#include <dsn/dist/replication/replication_service_app.h>\n#include <dsn/http/http_server.h>\n\nstatic void dsn_app_registration_simple_kv()\n{\n    dsn::FLAGS_enable_http_server = false; // disable http server\n\n    dsn::replication::application::simple_kv_service_impl::register_service();\n\n    dsn::service::meta_service_app::register_all();\n    dsn::replication::replication_service_app::register_all();\n\n    dsn::service_app::register_factory<dsn::replication::application::simple_kv_client_app>(\n        \"client\");\n}\n\nint main(int argc, char **argv)\n{\n    dsn_app_registration_simple_kv();\n    dsn_run(argc, argv, true);\n    return 0;\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.server.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n#include <iostream>\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/replication/storage_serverlet.h>\n\n#include \"simple_kv.code.definition.h\"\n#include \"simple_kv_types.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\nclass simple_kv_service : public replication_app_base, public storage_serverlet<simple_kv_service>\n{\npublic:\n    simple_kv_service(replica *r) : replication_app_base(r) {}\n    virtual ~simple_kv_service() {}\n\n    virtual int on_request(dsn::message_ex *request) override { return handle_request(request); }\nprotected:\n    // all service handlers to be implemented further\n    // RPC_SIMPLE_KV_SIMPLE_KV_READ\n    virtual void on_read(const std::string &key, ::dsn::rpc_replier<std::string> &reply)\n    {\n        std::cout << \"... exec RPC_SIMPLE_KV_SIMPLE_KV_READ ... (not implemented) \" << std::endl;\n        std::string resp;\n        reply(resp);\n    }\n    // RPC_SIMPLE_KV_SIMPLE_KV_WRITE\n    virtual void on_write(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n    {\n        std::cout << \"... exec RPC_SIMPLE_KV_SIMPLE_KV_WRITE ... (not implemented) \" << std::endl;\n        int32_t resp = 0;\n        reply(resp);\n    }\n    // RPC_SIMPLE_KV_SIMPLE_KV_APPEND\n    virtual void on_append(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n    {\n        std::cout << \"... exec RPC_SIMPLE_KV_SIMPLE_KV_APPEND ... (not implemented) \" << std::endl;\n        int32_t resp = 0;\n        reply(resp);\n    }\n\n    static void register_rpc_handlers()\n    {\n        register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_READ, \"read\", on_read);\n        register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_WRITE, \"write\", on_write);\n        register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_APPEND, \"append\", on_append);\n    }\n\nprivate:\n    static void\n    on_read(simple_kv_service *svc, const std::string &key, dsn::rpc_replier<std::string> &reply)\n    {\n        svc->on_read(key, reply);\n    }\n    static void\n    on_write(simple_kv_service *svc, const kv_pair &pr, dsn::rpc_replier<int32_t> &reply)\n    {\n        svc->on_write(pr, reply);\n    }\n    static void\n    on_append(simple_kv_service *svc, const kv_pair &pr, dsn::rpc_replier<int32_t> &reply)\n    {\n        svc->on_append(pr, reply);\n    }\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.server.impl.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"simple_kv.server.impl.h\"\n#include <fstream>\n#include <sstream>\n#include <dsn/utility/filesystem.h>\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\n\nsimple_kv_service_impl::simple_kv_service_impl(replica *r) : simple_kv_service(r), _lock(true)\n{\n    reset_state();\n    ddebug(\"simple_kv_service_impl inited\");\n}\n\nvoid simple_kv_service_impl::reset_state()\n{\n    _test_file_learning = dsn_config_get_value_bool(\"test\", \"test_file_learning\", true, \"\");\n    _last_durable_decree = 0;\n}\n\n// RPC_SIMPLE_KV_READ\nvoid simple_kv_service_impl::on_read(const std::string &key, ::dsn::rpc_replier<std::string> &reply)\n{\n    std::string r;\n    {\n        zauto_lock l(_lock);\n\n        auto it = _store.find(key);\n        if (it != _store.end()) {\n            r = it->second;\n        }\n    }\n\n    dinfo(\"read %s\", r.c_str());\n    reply(r);\n}\n\n// RPC_SIMPLE_KV_WRITE\nvoid simple_kv_service_impl::on_write(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n{\n    {\n        zauto_lock l(_lock);\n        _store[pr.key] = pr.value;\n    }\n\n    dinfo(\"write %s\", pr.key.c_str());\n    reply(0);\n}\n\n// RPC_SIMPLE_KV_APPEND\nvoid simple_kv_service_impl::on_append(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n{\n    {\n        zauto_lock l(_lock);\n        auto it = _store.find(pr.key);\n        if (it != _store.end())\n            it->second.append(pr.value);\n        else\n            _store[pr.key] = pr.value;\n    }\n\n    dinfo(\"append %s\", pr.key.c_str());\n    reply(0);\n}\n\n::dsn::error_code simple_kv_service_impl::start(int argc, char **argv)\n{\n    {\n        zauto_lock l(_lock);\n        set_last_durable_decree(0);\n        recover();\n    }\n    return ERR_OK;\n}\n\n::dsn::error_code simple_kv_service_impl::stop(bool clear_state)\n{\n    {\n        zauto_lock l(_lock);\n        if (clear_state) {\n            if (!dsn::utils::filesystem::remove_path(_dir_data)) {\n                dassert(false, \"Fail to delete directory %s.\", _dir_data.c_str());\n            }\n            reset_state();\n        }\n    }\n\n    return ERR_OK;\n}\n\n// checkpoint related\nvoid simple_kv_service_impl::recover()\n{\n    zauto_lock l(_lock);\n\n    _store.clear();\n\n    int64_t maxVersion = 0;\n    std::string name;\n\n    std::vector<std::string> sub_list;\n    std::string path = _dir_data;\n    if (!dsn::utils::filesystem::get_subfiles(path, sub_list, false)) {\n        dassert(false, \"Fail to get subfiles in %s.\", path.c_str());\n    }\n    for (auto &fpath : sub_list) {\n        auto &&s = dsn::utils::filesystem::get_file_name(fpath);\n        if (s.substr(0, strlen(\"checkpoint.\")) != std::string(\"checkpoint.\"))\n            continue;\n\n        int64_t version = static_cast<int64_t>(atoll(s.substr(strlen(\"checkpoint.\")).c_str()));\n        if (version > maxVersion) {\n            maxVersion = version;\n            name = std::string(data_dir()) + \"/\" + s;\n        }\n    }\n    sub_list.clear();\n\n    if (maxVersion > 0) {\n        recover(name, maxVersion);\n        set_last_durable_decree(maxVersion);\n    }\n}\n\nvoid simple_kv_service_impl::recover(const std::string &name, int64_t version)\n{\n    zauto_lock l(_lock);\n\n    std::ifstream is(name.c_str(), std::ios::binary);\n    if (!is.is_open())\n        return;\n\n    _store.clear();\n\n    uint64_t count;\n    int magic;\n\n    is.read((char *)&count, sizeof(count));\n    is.read((char *)&magic, sizeof(magic));\n    dassert(magic == 0xdeadbeef, \"invalid checkpoint\");\n\n    for (uint64_t i = 0; i < count; i++) {\n        std::string key;\n        std::string value;\n\n        uint32_t sz;\n        is.read((char *)&sz, (uint32_t)sizeof(sz));\n        key.resize(sz);\n\n        is.read((char *)&key[0], sz);\n\n        is.read((char *)&sz, (uint32_t)sizeof(sz));\n        value.resize(sz);\n\n        is.read((char *)&value[0], sz);\n\n        _store[key] = value;\n    }\n    is.close();\n}\n\n::dsn::error_code simple_kv_service_impl::sync_checkpoint()\n{\n    char name[256];\n    int64_t last_commit = _last_committed_decree.load();\n    sprintf(name, \"%s/checkpoint.%\" PRId64, _dir_data.c_str(), last_commit);\n\n    zauto_lock l(_lock);\n\n    if (last_commit == last_durable_decree()) {\n        dassert(utils::filesystem::file_exists(name), \"checkpoint file %s is missing!\", name);\n        return ERR_OK;\n    }\n\n    std::ofstream os(name, std::ios::binary);\n\n    uint64_t count = (uint64_t)_store.size();\n    int magic = 0xdeadbeef;\n\n    os.write((const char *)&count, (uint32_t)sizeof(count));\n    os.write((const char *)&magic, (uint32_t)sizeof(magic));\n\n    for (auto it = _store.begin(); it != _store.end(); ++it) {\n        const std::string &k = it->first;\n        uint32_t sz = (uint32_t)k.length();\n\n        os.write((const char *)&sz, (uint32_t)sizeof(sz));\n        os.write((const char *)&k[0], sz);\n\n        const std::string &v = it->second;\n        sz = (uint32_t)v.length();\n\n        os.write((const char *)&sz, (uint32_t)sizeof(sz));\n        os.write((const char *)&v[0], sz);\n    }\n\n    os.close();\n\n    // TODO: gc checkpoints\n    set_last_durable_decree(last_commit);\n    return ERR_OK;\n}\n\n::dsn::error_code simple_kv_service_impl::async_checkpoint(bool flush_memtable)\n{\n    return sync_checkpoint();\n}\n\n// helper routines to accelerate learning\n::dsn::error_code simple_kv_service_impl::get_checkpoint(int64_t learn_start,\n                                                         const dsn::blob &learn_request,\n                                                         /*out*/ learn_state &state)\n{\n    if (last_durable_decree() > 0) {\n        char name[256];\n        sprintf(name, \"%s/checkpoint.%\" PRId64, _dir_data.c_str(), last_durable_decree());\n\n        state.from_decree_excluded = 0;\n        state.to_decree_included = last_durable_decree();\n        state.files.push_back(std::string(name));\n        return ERR_OK;\n    } else {\n        state.from_decree_excluded = 0;\n        state.to_decree_included = 0;\n        return ERR_OBJECT_NOT_FOUND;\n    }\n}\n\n::dsn::error_code simple_kv_service_impl::storage_apply_checkpoint(chkpt_apply_mode mode,\n                                                                   const learn_state &state)\n{\n    if (mode == chkpt_apply_mode::learn) {\n        recover(state.files[0], state.to_decree_included);\n        return ERR_OK;\n    } else {\n        dassert(chkpt_apply_mode::copy == mode, \"invalid mode %d\", (int)mode);\n        dassert(state.to_decree_included > last_durable_decree(),\n                \"checkpoint's decree is smaller than current\");\n\n        char name[256];\n        sprintf(name, \"%s/checkpoint.%\" PRId64, _dir_data.c_str(), state.to_decree_included);\n        std::string lname(name);\n\n        if (!utils::filesystem::rename_path(state.files[0], lname))\n            return ERR_CHECKPOINT_FAILED;\n        else {\n            set_last_durable_decree(state.to_decree_included);\n            return ERR_OK;\n        }\n    }\n}\n}\n}\n} // namespace\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.server.impl.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include \"simple_kv.server.h\"\n#include \"replica/replica.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace application {\nclass simple_kv_service_impl : public simple_kv_service\n{\npublic:\n    static void register_service()\n    {\n        replication_app_base::register_storage_engine(\n            \"simple_kv\", replication_app_base::create<simple_kv_service_impl>);\n        simple_kv_service::register_rpc_handlers();\n    }\n    simple_kv_service_impl(replica *r);\n\n    // RPC_SIMPLE_KV_READ\n    virtual void on_read(const std::string &key, ::dsn::rpc_replier<std::string> &reply);\n    // RPC_SIMPLE_KV_WRITE\n    virtual void on_write(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply);\n    // RPC_SIMPLE_KV_APPEND\n    virtual void on_append(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply);\n\n    virtual ::dsn::error_code start(int argc, char **argv) override;\n\n    virtual ::dsn::error_code stop(bool cleanup = false) override;\n\n    virtual int64_t last_durable_decree() const override { return _last_durable_decree; }\n\n    virtual ::dsn::error_code sync_checkpoint() override;\n\n    virtual ::dsn::error_code async_checkpoint(bool flush_memtable) override;\n\n    virtual ::dsn::error_code copy_checkpoint_to_dir(const char *checkpoint_dir,\n                                                     int64_t *last_decree,\n                                                     bool flush_memtable = false) override\n    {\n        return ERR_NOT_IMPLEMENTED;\n    }\n\n    virtual ::dsn::error_code prepare_get_checkpoint(blob &learn_req) { return dsn::ERR_OK; }\n\n    virtual ::dsn::error_code get_checkpoint(int64_t learn_start,\n                                             const dsn::blob &learn_request,\n                                             /*out*/ learn_state &state) override;\n\n    virtual ::dsn::error_code storage_apply_checkpoint(chkpt_apply_mode mode,\n                                                       const learn_state &state) override;\n\n    std::string query_compact_state() const override { return \"\"; }\n\n    virtual void update_app_envs(const std::map<std::string, std::string> &envs) {}\n\n    virtual void query_app_envs(/*out*/ std::map<std::string, std::string> &envs) {}\n\n    virtual uint32_t query_data_version() const override { return 0; }\n\n    virtual ::dsn::replication::manual_compaction_status::type query_compact_status() const override\n    {\n        return dsn::replication::manual_compaction_status::IDLE;\n    }\n\nprivate:\n    void recover();\n    void recover(const std::string &name, int64_t version);\n    void set_last_durable_decree(int64_t d) { _last_durable_decree = d; }\n\n    void reset_state();\n\nprivate:\n    typedef std::map<std::string, std::string> simple_kv;\n    zlock _lock;\n    simple_kv _store;\n    bool _test_file_learning;\n    int64_t _last_durable_decree;\n};\n}\n}\n} // namespace\n"
  },
  {
    "path": "src/replica/storage/simple_kv/simple_kv.thrift",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\nnamespace cpp dsn.replication.application\n\nstruct kv_pair\n{\n    1:string key;\n    2:string value;\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.rep_tests.simple_kv)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_replica_server\n                 dsn_meta_server\n                 dsn_replication_common\n                 dsn_client\n                 dsn.failure_detector\n                 dsn.replication.zookeeper_provider\n                 dsn_runtime\n                 zookeeper\n                 hashtable\n                 gtest\n                 )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nFILE(GLOB CASE_FILES \"case-*\")\nset(MY_BINPLACES\n    \"run.sh\"\n    \"clear.sh\"\n    \"addcase.sh\"\n    \"${CASE_FILES}\"\n)\n\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/README.md",
    "content": "<!--\n  ~ The MIT License (MIT)\n  ~\n  ~ Copyright (c) 2015 Microsoft Corporation\n  ~\n  ~ -=- Robust Distributed System Nucleus (rDSN) -=-\n  ~\n  ~ Permission is hereby granted, free of charge, to any person obtaining a copy\n  ~ of this software and associated documentation files (the \"Software\"), to deal\n  ~ in the Software without restriction, including without limitation the rights\n  ~ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n  ~ copies of the Software, and to permit persons to whom the Software is\n  ~ furnished to do so, subject to the following conditions:\n  ~\n  ~ The above copyright notice and this permission notice shall be included in\n  ~ all copies or substantial portions of the Software.\n  ~\n  ~ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n  ~ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n  ~ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n  ~ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n  ~ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n  ~ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n  ~ THE SOFTWARE.\n-->\n\nCase Classification:\n  000~099: nomal operation\n  100~199: RPC fault/failure between replica servers\n  200~299: AIO fault/failure in replica servers\n  300~399: RPC fault/failure between replica server and meta server\n  400~499: mixed fault/failure of above\n  500~599: test cases of failure detector module\n  600~699: regression testing\n\nRun case:\n  ./run.sh <case-id>\nfor example:\n  ./run.sh 000\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/addcase.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ $# -ne 2 ]; then\n    echo \"USAGE: $0 <new-case-id> <from-case-id>\"\n    echo \" e.g.: $0 106 100\"\n    exit 1\nfi\n\nid=$1\n\nif [ -f case-${id}.act ]; then\n    echo \"case ${id} already exists\"\n    exit 1\nfi\n\nold=$2\ncp case-${old}.act case-${id}.act\ncp case-${old}.ini case-${id}.ini\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-000.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - normal case\n# - no error injected\n# - just do write and read\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# begin write 1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,1},{r2,sec,3,0},{r3,sec,3,0}}\n\n# end write 1\nclient:end_write:id=1,err=err_ok,resp=0\n\n# begin read 1\nclient:begin_read:id=1,key=k1,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=v1\n\n# begin write 2\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,2},{r2,sec,3,1},{r3,sec,3,1}}\n\n# end write 2\nclient:end_write:id=2,err=err_ok,resp=0\n\n# begin read 2\nclient:begin_read:id=2,key=k2,timeout=0\n\n# end read 2\nclient:end_read:id=2,err=err_ok,resp=v2\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-000.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-001.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learning\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# check config is not enough but check state instead\n# to ensure primary is setup on replica but only only on meta\n# server.\n# the reason is that the next only RPC_CONFIG_PROPOSAL (due to\n# set:disable_load_balance=1 later) can be skipped if the\n# replica is not in primary status yet, which leads to failure\n# of r2 becomes potential secondary.\n#\n# config:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n#\n# on_rpc_request_enqueue may happen before the above state instruction, so\n#\n# wait:on_rpc_request_enqueue:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\nset:disable_load_balance=1\n\n# kick r2\nclient:begin_write:id=21,key=k21,value=v21,timeout=0\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\nconfig:{4,r1,[r3]}\nclient:end_write:id=21,err=ERR_OK,resp=0\n\n# kick r1\nclient:begin_write:id=22,key=k22,value=v22,timeout=0\ninject:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nconfig:{5,-,[r3]}\nclient:end_write:id=22,err=ERR_TIMEOUT,resp=0\n\n# make r3 as primary\nset:disable_load_balance=0\nconfig:{6,r3,[]}\nconfig:{7,r3,[r1]}\nconfig:{8,r3,[r1,r2]}\nstate:{{r1,sec,8,13},{r2,sec,8,13},{r3,pri,8,13}}\n\n# check data\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=21,key=k21,timeout=0\nclient:end_read:id=21,err=err_ok,resp=v21\nclient:begin_read:id=22,key=k22,timeout=0\nclient:end_read:id=22,err=err_ok,resp=v22\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-001.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-002.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learning\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# check config is not enough but check state instead\n# to ensure primary is setup on replica but only only on meta\n# server.\n# the reason is that the next only RPC_CONFIG_PROPOSAL (due to\n# set:disable_load_balance=1 later) can be skipped if the\n# replica is not in primary status yet, which leads to failure\n# of r2 becomes potential secondary.\n#\n# config:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n#\n# on_rpc_request_enqueue may happen before the above state instruction, so\n#\n# wait:on_rpc_request_enqueue:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\nset:disable_load_balance=1\n\n# kick r2\nclient:begin_write:id=21,key=k21,value=v21,timeout=0\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\nconfig:{4,r1,[r3]}\nclient:end_write:id=21,err=ERR_OK,resp=0\n\n# kick r1\nclient:begin_write:id=22,key=k22,value=v22,timeout=0\ninject:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nconfig:{5,-,[r3]}\nclient:end_write:id=22,err=ERR_TIMEOUT,resp=0\n\n# make r3 as primary\nset:disable_load_balance=0\nconfig:{6,r3,[]}\nconfig:{7,r3,[r1]}\nconfig:{8,r3,[r1,r2]}\nstate:{{r1,sec,8,13},{r2,sec,8,13},{r3,pri,8,13}}\n\n# check data\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=21,key=k21,timeout=0\nclient:end_read:id=21,err=err_ok,resp=v21\nclient:begin_read:id=22,key=k22,timeout=0\nclient:end_read:id=22,err=err_ok,resp=v22\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-002.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-003.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learning\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# check config is not enough but check state instead\n# to ensure primary is setup on replica but only only on meta\n# server.\n# the reason is that the next only RPC_CONFIG_PROPOSAL (due to\n# set:disable_load_balance=1 later) can be skipped if the\n# replica is not in primary status yet, which leads to failure\n# of r2 becomes potential secondary.\n#\n# config:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n#\n# on_rpc_request_enqueue may happen before the above state instruction, so\n#\n# wait:on_rpc_request_enqueue:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\nset:disable_load_balance=1\n\n# kick r2\nclient:begin_write:id=21,key=k21,value=v21,timeout=0\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\nconfig:{4,r1,[r3]}\nclient:end_write:id=21,err=ERR_OK,resp=0\n\n# kick r1\nclient:begin_write:id=22,key=k22,value=v22,timeout=0\ninject:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nconfig:{5,-,[r3]}\nclient:end_write:id=22,err=ERR_TIMEOUT,resp=0\n\n# make r3 as primary\nset:disable_load_balance=0\nconfig:{6,r3,[]}\nconfig:{7,r3,[r1]}\nconfig:{8,r3,[r1,r2]}\nstate:{{r1,sec,8,13},{r2,sec,8,13},{r3,pri,8,13}}\n\n# check data\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=21,key=k21,timeout=0\nclient:end_read:id=21,err=err_ok,resp=v21\nclient:begin_read:id=22,key=k22,timeout=0\nclient:end_read:id=22,err=err_ok,resp=v22\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-003.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = true\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-004.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learning\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# check config is not enough but check state instead\n# to ensure primary is setup on replica but only only on meta\n# server.\n# the reason is that the next only RPC_CONFIG_PROPOSAL (due to\n# set:disable_load_balance=1 later) can be skipped if the\n# replica is not in primary status yet, which leads to failure\n# of r2 becomes potential secondary.\n#\n# config:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n#\n# on_rpc_request_enqueue may happen before the above state instruction, so\n#\n# wait:on_rpc_request_enqueue:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\nset:disable_load_balance=1\n\n# kick r2\nclient:begin_write:id=21,key=k21,value=v21,timeout=0\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\nconfig:{4,r1,[r3]}\nclient:end_write:id=21,err=ERR_OK,resp=0\n\n# kick r1\nclient:begin_write:id=22,key=k22,value=v22,timeout=0\ninject:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nconfig:{5,-,[r3]}\nclient:end_write:id=22,err=ERR_TIMEOUT,resp=0\n\n# make r3 as primary\nset:disable_load_balance=0\nconfig:{6,r3,[]}\nconfig:{7,r3,[r1]}\nconfig:{8,r3,[r1,r2]}\nstate:{{r1,sec,8,13},{r2,sec,8,13},{r3,pri,8,13}}\n\n# check data\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=21,key=k21,timeout=0\nclient:end_read:id=21,err=err_ok,resp=v21\nclient:begin_read:id=22,key=k22,timeout=0\nclient:end_read:id=22,err=err_ok,resp=v22\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-004.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = true\ndelta_state_learning_supported = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-005.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n#   regression testing for issue #175\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# begin write 1\n# delay callback of writing shared log for #1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nwait:on_task_begin:node=r1,task_code=RPC_SIMPLE_KV_SIMPLE_KV_WRITE\nmodify:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE,type=WRITE,delay=0,modify_delay=2000\nwait:on_task_end:node=r1,task_code=RPC_SIMPLE_KV_SIMPLE_KV_WRITE\n\n# begin write 2\n# writing shared log should be pending\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nwait:on_task_begin:node=r1,task_code=RPC_SIMPLE_KV_SIMPLE_KV_WRITE\nwait:on_task_end:node=r1,task_code=RPC_SIMPLE_KV_SIMPLE_KV_WRITE\n\n# after delay time, wait callback of writing shared log for #1 and #2\nwait:on_task_begin:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nwait:on_task_begin:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\n\n# end write 2\nclient:end_write:id=2,err=err_ok,resp=0\n\n# check data correction\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-005.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = false\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-006.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test reject 2pc when there aren't enough members\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# check config is not enough but check state instead\n# to ensure primary is setup on replica but only only on meta\n# server.\n# the reason is that the next only RPC_CONFIG_PROPOSAL (due to\n# set:disable_load_balance=1 later) can be skipped if the\n# replica is not in primary status yet, which leads to failure\n# of r2 becomes potential secondary.\n#\n# config:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n#\n# on_rpc_request_enqueue may happen before the above state instruction, so\n#\n# wait:on_rpc_request_enqueue:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\n\n# primary will remove r2 from config\nconfig:{3,r1,[]}\n\n# part of requests will commit in primary\nstate:{{r1,pri,3,5}}\n\n# enable load balancer again:\nset:disable_load_balance=0\n\nconfig:{5,r1,[r2,r3]}\n\n# some requests are in prepare list, after add secondary, then will be prepared again.\nstate:{{r1,pri,5,5,5},{r2,sec,5,5,5},{r3,sec,5,5,5}}\n\n# write some new requests\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\n\n# all write will be succeed\nclient:end_write:id=15,err=ERR_OK,resp=0\n\n# all requests will be committed\nstate:{{r1,pri,5,9,9},{r2,sec,5,9,9},{r3,sec,5,9,9}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-006.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-100.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of prepare\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait until server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\nset:disable_load_balance=1\n\n# begin write\nclient:begin_write:id=1,key=aaa,value=bbb,timeout=0\n\n# inject prepare\ninject:on_rpc_call:rpc_name=rpc_prepare,from=r1,to=r2\n\n# wait until r2 kicked\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,0},{r2,sec,3,0},{r3,sec,4,0}}\n\n# end write\nstate:{{r1,pri,4,1},{r2,sec,3,0},{r3,sec,4,0}}\nclient:end_write:id=1,err=err_ok,resp=0\n\n# begin read 1\nclient:begin_read:id=1,key=aaa,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=bbb\n\nset:disable_load_balance=0\n\n# wait until recover done\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,1},{r2,sec,5,1},{r3,sec,5,1}}\n\n# begin read 2\nclient:begin_read:id=2,key=aaa,timeout=0\n\n# end read 2\nclient:end_read:id=2,err=err_ok,resp=bbb\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-100.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-101.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_reply of prepare\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait until server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\nset:disable_load_balance=1\n\n# begin write\nclient:begin_write:id=1,key=aaa,value=bbb,timeout=0\n\n# inject prepare fail\ninject:on_rpc_reply:rpc_name=rpc_prepare_ack,from=r2,to=r1\n\n# kick r2\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,0},{r2,sec,3,0},{r3,sec,4,0}}\n\n# end write\nstate:{{r1,pri,4,1},{r2,sec,3,0},{r3,sec,4,0}}\nclient:end_write:id=1,err=err_ok,resp=0\n\n# begin read 1\nclient:begin_read:id=1,key=aaa,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=bbb\n\nset:disable_load_balance=0\n\n# wait until recover done\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,1},{r2,sec,5,1},{r3,sec,5,1}}\n\n# begin read 2\nclient:begin_read:id=2,key=aaa,timeout=0\n\n# end read 2\nclient:end_read:id=2,err=err_ok,resp=bbb\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-101.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-102.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of add_learner\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# inject add learner r2 \ninject:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n####\n# the following group check will still init learning\n####\nwait:on_rpc_call:rpc_name=RPC_GROUP_CHECK,from=r1,to=r2\n\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-102.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 5000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-103.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of group check\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait add learner r2 \ninject:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n# inject the following group check\n# the failure of group_check will cause erase of learner on primary\ninject:on_rpc_call:rpc_name=RPC_GROUP_CHECK,from=r1,to=r2\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until start learn\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-103.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 5000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlb_interval_ms = 2000\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-104.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of add_learner\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# inject add_learner\ninject:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\nset:disable_load_balance=0\n####\n# after all, r2 will start to learn\n####\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-104.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-105.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait add learner r2 \nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n# inject start learn\ninject:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# r2 changes to inactive due to rpc_learn timeout\nstate:{{r1,pri,1,0},{r2,ina,1,0}}\n\n###\n# with group_check, r2 will be learner again\n###\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-105.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-106.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_reply of learn ack\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait add learner r2 \nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n# wait start learn\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# inject learn ack\ninject:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r2\n\n# r2 changes to inactive due to rpc_learn timeout\nstate:{{r1,pri,1,0},{r2,ina,1,0}}\n\n###\n# with group_check, r2 will be learner again\n###\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-106.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-107.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of notify learn completion\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait add learner r2 \nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n# wait start learn\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait learn ack\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r2\n\n# inject notify learn completion (which is one way rpc)\ninject:on_rpc_call:rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r2,to=r1\n\nset:disable_load_balance=0\n\n###\n# no matter rebalance or group_check, r2 will be learner again\n###\nwait:on_rpc_call:rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r2,to=r1\n\n# wait until server ready\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-107.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-108.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_call of notify learn completion\n# and set group_check_internal_ms=5000\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\n\n# wait start learn\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r2,to=r1\n\n# wait learn ack\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r2\n\n# inject notify learn completion\ninject:on_rpc_call:rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r2,to=r1\n\n###\n# after some time, r1 will do group check to r2,\n# which will find learning succeed\n###\nwait:on_rpc_request_enqueue:rpc_name=RPC_GROUP_CHECK,from=r1,to=r2\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-108.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-109.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_rpc_reply of RPC_LEARN_ACK\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\n# add secondary r2\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r2\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\n# write data \nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\n# wait write data done\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\n# add secondary r3\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# r3 start to learn\nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r1,to=r3\nstate:{{r1,pri,2,11},{r2,sec,2,11},{r3,pot,2,0}}\n\n# learn LT_APP\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nstate:{{r1,pri,2,11},{r2,sec,2,11},{r3,pot,2,11}}\n\n# start another round of learn\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\n\n# inject LEARN_ACK\ninject:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n\n# wait LEARN_ACK timeout, change to inactive\nstate:{{r1,pri,2,11},{r2,sec,2,11},{r3,ina,2,11}}\n\n# downgrade r2 to inactive\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,11},{r2,sec,2,11},{r3,ina,2,11}}\ninject:on_rpc_call:rpc_name=RPC_GROUP_CHECK,from=r1,to=r3\n\n# downgrade r1 to inactive\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{4,-,[]}\nstate:{{r1,ina,4,11},{r2,sec,2,11},{r3,ina,2,11}}\n\n# assign primary to r3\nclient:replica_config:receiver=r3,type=assign_primary,node=r3\nconfig:{5,r3,[]}\nstate:{{r1,ina,4,11},{r2,sec,2,11},{r3,pri,5,11}}\n\n# add secondary r1\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r3\nset:disable_load_balance=1\n\nwait:on_rpc_call:rpc_name=RPC_LEARN_ADD_LEARNER,from=r3,to=r1\nconfig:{6,r3,[r1]}\nstate:{{r1,sec,6,11},{r2,sec,2,11},{r3,pri,6,11}}\n\n# start write again\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,err=ERR_OK,resp=0\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-109.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-200.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_aio_call of secondary log write\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\nset:disable_load_balance=1\n\n# begin write 1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\n\n# inject aio error on secondary (shared mutation log write error)\nwait:on_rpc_call:rpc_name=RPC_PREPARE,from=r1,to=r2\ninject:on_aio_call:node=r2,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\n\n# error on r2\nstate:{{r1,pri,3,0},{r2,err,3,0},{r3,sec,3,0}}\n\n# m update config to kick r2\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,0},{r3,sec,4,0}}\n\n# end write 1\nstate:{{r1,pri,4,1},{r3,sec,4,0}}\nclient:end_write:id=1,err=err_ok,resp=0\n\nset:disable_load_balance=0\n\n# r2 become potential\nstate:{{r1,pri,4,1},{r2,pot,4,0},{r3,sec,4,0}}\n\n# r2 learn done\nstate:{{r1,pri,5,1},{r2,sec,5,1},{r3,sec,5,1}}\n\n# begin read 1\nclient:begin_read:id=1,key=k1,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=v1\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-200.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-201.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# inject on_aio_call of primary log write\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# begin write 1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\n\n# inject aio error on primary\ninject:on_aio_call:node=r1,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\n\n# error on r2\nstate:{{r1,err,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# r1 drop it self\nstate:{{r2,sec,3,0},{r3,sec,3,0}}\nconfig:{4,-,[r2,r3]}\n\n# r2 become primary\nstate:{{r2,ina,4,0},{r3,sec,3,0}}\nconfig:{5,r2,[r3]}\nset:disable_load_balance=1\n\n# commit write\nstate:{{r2,pri,5,1},{r3,sec,5,0}}\n\n# client receive timeout (but data is already committed)\nclient:end_write:id=1,err=ERR_TIMEOUT,resp=0\n\nset:disable_load_balance=0\n\n# r1 recover\nstate:{{r1,sec,6,1},{r2,pri,6,1},{r3,sec,6,1}}\n\n# begin read 1\nclient:begin_read:id=1,key=k1,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=v1\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-201.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-202-0.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test recover\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-202-0.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\nlog_private_batch_buffer_kb = 4\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = true\ndelta_state_learning_supported = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-202-1.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test recover\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nset:disable_load_balance=1\n\n# inject open_read of local log file log.1.0 at r2\ninject:on_aio_call:node=r2,type=READ,file_offset=0\n\n# recover ok, recoverd primary will force to inc ballot\nconfig:{5,r1,[r3]}\nstate:{{r1,pri,5,11},{r3,sec,5,11}}\n\nset:disable_load_balance=0\n\n# r2 catch up\nconfig:{6,r1,[r2,r3]}\nstate:{{r1,pri,6,11},{r2,sec,6,11},{r3,sec,6,11}}\n\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,key=k12,err=err_ok,resp=0\n\nstate:{{r1,pri,6,12},{r2,sec,6,12},{r3,sec,6,12}}\n\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,key=k12,err=err_ok,resp=v12\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-202-1.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\nlog_private_batch_buffer_kb = 4\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = true\ndelta_state_learning_supported = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-203-0.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test recover\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,11},{r2,sec,3,11},{r3,sec,3,11}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-203-0.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\nlog_private_batch_buffer_kb = 0\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = true\ndelta_state_learning_supported = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-204.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r2\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=11} > _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 0\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :                           |-->\n#                                           (11)\n#\nclient:replica_config:receiver=r1,type=add_secondary,node=r2\nconfig:{4,r1,[r2]}\nstate:{{r1,pri,4,10},{r2,sec,4,10}}\n\n# change primary from r1 to r2\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{5,-,[r1,r2]}\nstate:{{r1,sec,5,10},{r2,sec,5,10}}\nclient:replica_config:receiver=r2,type=upgrade_to_primary,node=r2\nconfig:{6,r2,[r1]}\nstate:{{r1,sec,6,10},{r2,pri,6,10}}\n\n# check r2 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r2\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-204.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-205.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r3 (new replica)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=1} == _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 10\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :      |-->\n#                      (1)\n#\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{5,-,[r1,r3]}\nstate:{{r1,sec,5,10},{r3,sec,5,10}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{6,r3,[r1]}\nstate:{{r1,sec,6,10},{r3,pri,6,10}}\n\n# check r3 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-205.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-206.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r3 (new replica)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=1} == _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 10\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :      |-->\n#                      (1)\n#\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# write #11 and #12\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:end_write:id=11,key=k11,err=err_ok,resp=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,key=k12,err=err_ok,resp=0\n\n# add secondary r2 (old replica commit to 10)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=11} > _prepare_list->min_decree() {=3}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 2\n#\n#                      (3)                 (12)\n#   prepare_list :      |-------------------|  \n#   learn        :                         |-->\n#                                         (11)\n#\nclient:replica_config:receiver=r1,type=add_secondary,node=r2\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,12},{r2,sec,5,12},{r3,sec,5,12}}\n\n# change primary from r1 to r2\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{6,-,[r1,r2,r3]}\nstate:{{r1,sec,6,12},{r2,sec,6,12},{r3,sec,6,12}}\nclient:replica_config:receiver=r2,type=upgrade_to_primary,node=r2\nconfig:{7,r2,[r1,r3]}\nstate:{{r1,sec,7,12},{r2,pri,7,12},{r3,sec,7,12}}\n\n# check r2 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r2\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-206.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-207.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r3 (new replica)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# write data #11 - #15\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:end_write:id=11,key=k11,err=err_ok,resp=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,key=k12,err=err_ok,resp=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:end_write:id=13,key=k13,err=err_ok,resp=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:end_write:id=14,key=k14,err=err_ok,resp=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:end_write:id=15,key=k15,err=err_ok,resp=0\n\nstate:{{r1,pri,4,15,15},{r3,sec,4,15,15}}\n\n# remove secondary r3\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r3\nconfig:{5,r1,[]}\nstate:{{r1,pri,5,15}}\n\n# remove primary r1\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{6,-,[]}\nstate:{}\n\n# add primary r2\nclient:replica_config:receiver=r2,type=assign_primary,node=r2\nconfig:{7,r2,[]}\nstate:{{r2,pri,7,10}}\n\n# check r2 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r2\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# add secondary r3\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=15} > last_prepared_decree(10)\n#   learner state is newer than learnee, learn from scratch\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------| \n#   learn        :                                   |-->\n#                                                    (15)\nclient:replica_config:receiver=r2,type=add_secondary,node=r3\nstate:{{r3,pot,7,15,15},{r2,pri,7,10,10}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r2\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r2,to=r3\nstate:{{r3,pot,7,15,0},{r2,pri,7,10,10}}\nstate:{{r3,pot,7,0},{r2,pri,7,10}}\nstate:{{r2,pri,8,10}, {r3,sec,8,10}}\n\n# check r2 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r2\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=<<not-exist>>\n\n# change primary from r2 to r3\nclient:replica_config:receiver=r2,type=downgrade_to_secondary,node=r2\nconfig:{9,-,[r2,r3]}\nstate:{{r2,sec,9,10},{r3,sec,9,10}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{10,r3,[r2]}\nstate:{{r3,pri,10,10},{r2,sec,10,10}}\n\n# check r3 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=<<not-exist>>\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-207.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-208.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r3 (new replica)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# write #11 - #15\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:end_write:id=11,key=k11,err=err_ok,resp=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,key=k12,err=err_ok,resp=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:end_write:id=13,key=k13,err=err_ok,resp=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:end_write:id=14,key=k14,err=err_ok,resp=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:end_write:id=15,key=k15,err=err_ok,resp=0\n\nstate:{{r1,pri,4,15,15},{r3,sec,4,15,15}}\n\n# remove secondary r3\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r3\nconfig:{5,r1,[]}\nstate:{{r1,pri,5,15}}\n\n# remove primary r1\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{6,-,[]}\nstate:{}\n\n# assign primary r2\nclient:replica_config:receiver=r2,type=assign_primary,node=r2\nconfig:{7,r2,[]}\nstate:{{r2,pri,7,10,10}}\n\n# add secondary r1\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=15} > last_prepared_decree(10)\n#   learner state is newer than learnee, learn from scratch\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------| \n#   learn        :                                   |-->\n#                                                    (15)\n#\n#   set:simple_kv_open_fail=1 to inject failure when reopen the app\nclient:replica_config:receiver=r2,type=add_secondary,node=r1\nstate:{{r1,pot,7,15,15},{r2,pri,7,10,10}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r1,to=r2\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r2,to=r1\nset:simple_kv_open_fail=1\nstate:{{r1,pot,7,15,0},{r2,pri,7,10,10}}\nstate:{{r1,err,7,0},{r2,pri,7,10}}\nstate:{{r2,pri,7,10}}\nset:simple_kv_open_fail=0\n\n# the following group_check will trigger init_learn again\n# learn succeed\nwait:on_task_begin:node=r1,task_code=RPC_GROUP_CHECK\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r1,to=r2\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r2,to=r1\nconfig:{8,r2,[r1]}\nstate:{{r1,sec,8,10},{r2,pri,8,10}}\n\n# change primary from r2 to r1\nclient:replica_config:receiver=r2,type=downgrade_to_secondary,node=r2\nconfig:{9,-,[r1,r2]}\nstate:{{r1,sec,9,10},{r2,sec,9,10}}\nclient:replica_config:receiver=r1,type=upgrade_to_primary,node=r1\nconfig:{10,r1,[r2]}\nstate:{{r2,sec,10,10},{r1,pri,10,10}}\n\n# check r1 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=<<not-exist>>\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-208.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-209.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# remove secondary r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{3,r1,[]}\nstate:{{r1,pri,3,10}}\n\n# add secondary r3 (new replica)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# write #11 - #15\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:end_write:id=11,key=k11,err=err_ok,resp=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:end_write:id=12,key=k12,err=err_ok,resp=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:end_write:id=13,key=k13,err=err_ok,resp=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:end_write:id=14,key=k14,err=err_ok,resp=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:end_write:id=15,key=k15,err=err_ok,resp=0\n\nstate:{{r1,pri,4,15,15},{r3,sec,4,15,15}}\n\n# remove secondary r3\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r3\nconfig:{5,r1,[]}\nstate:{{r1,pri,5,15}}\n\n# remove primary r1\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{6,-,[]}\nstate:{}\n\n# assign primary r2\nclient:replica_config:receiver=r2,type=assign_primary,node=r2\nconfig:{7,r2,[]}\nstate:{{r2,pri,7,10,10}}\n\n# add secondary r1\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=15} > last_prepared_decree(10)\n#   learner state is newer than learnee, learn from scratch\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------| \n#   learn        :                                   |-->\n#                                                    (15)\n#\n#   set:simple_kv_close_fail=1 to inject failure when reopen the app\nclient:replica_config:receiver=r2,type=add_secondary,node=r1\nstate:{{r1,pot,7,15,15},{r2,pri,7,10,10}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r1,to=r2\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r2,to=r1\nset:simple_kv_close_fail=1\nstate:{{r1,err,7,15,15},{r2,pri,7,10,10}}\nstate:{{r2,pri,7,10}}\nset:simple_kv_close_fail=0\n\n# the following group_check will trigger init_learn again\n# learn succeed\nwait:on_task_begin:node=r1,task_code=RPC_GROUP_CHECK\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r1,to=r2\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r2,to=r1\nconfig:{8,r2,[r1]}\nstate:{{r1,sec,8,10},{r2,pri,8,10}}\n\n# change primary from r2 to r1\nclient:replica_config:receiver=r2,type=downgrade_to_secondary,node=r2\nconfig:{9,-,[r1,r2]}\nstate:{{r1,sec,9,10},{r2,sec,9,10}}\nclient:replica_config:receiver=r1,type=upgrade_to_primary,node=r1\nconfig:{10,r1,[r2]}\nstate:{{r2,sec,10,10},{r1,pri,10,10}}\n\n# check r1 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=<<not-exist>>\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=<<not-exist>>\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-209.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-210.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,5,5},{r2,sec,2,5,5}}\n\n# write more\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\n\nstate:{{r1,pri,2,15,5},{r2,sec,2,15,5}}\n\n# add secondary r3\n# will trigger learning in on_learn():\n#   learn_start_decree {=1} < _prepare_list->min_decree() {=6}\n#   learn_start_decree {=1} < _app->last_durable_decree() {=5}\n#   learn checkpoint + cache\n#\n#                         (5)\n#   check_point  :      -->|\n#                          (6)                 (15)\n#   prepare_list :          |-------------------|  \n#                    (1)\n#   learn        :    |-->\n#\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,15},{r2,sec,3,15},{r3,sec,3,15}}\n\n# check r1 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{4,-,[r1,r2,r3]}\nstate:{{r1,sec,4,15},{r2,sec,4,15},{r3,sec,4,15}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{5,r3,[r1,r2]}\nstate:{{r1,sec,5,15},{r2,sec,5,15},{r3,pri,5,15}}\n\n# check r3 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-210.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 11\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 5000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-211.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,5,5},{r2,sec,2,5,5}}\n\n# write more\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\n\nstate:{{r1,pri,2,15,5},{r2,sec,2,15,5}}\n\n# add secondary r3\n# will trigger learning in on_learn():\n#   learn_start_decree {=1} < _prepare_list->min_decree() {=6}\n#   learn_start_decree {=1} < _app->last_durable_decree() {=5}\n#   learn checkpoint + cache\n#\n#                         (5)\n#   check_point  :      -->|\n#                          (6)                 (15)\n#   prepare_list :          |-------------------|  \n#                    (1)\n#   learn        :    |-->\n#\n#   set simple_kv_get_checkpoint_fail=1\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nset:simple_kv_get_checkpoint_fail=1\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,pot,2,0}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,err,2,0}}\nstate:{{r1,pri,2,15},{r2,sec,2,15}}\nset:simple_kv_get_checkpoint_fail=0\n\n# the following group_check will trigger init_learn again\n# inject learn_ack\nwait:on_task_begin:node=r3,task_code=RPC_GROUP_CHECK\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,pot,2,0}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\ninject:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# rpc_learn timeout will make r3 inactive\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,ina,2,0}}\n# and further group check will make r3 potential secondary again\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,pot,2,0}}\n\nset:simple_kv_apply_checkpoint_fail=1\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,err,2,0}}\nstate:{{r1,pri,2,15},{r2,sec,2,15}}\nset:simple_kv_apply_checkpoint_fail=0\n\n# the following group_check will trigger init_learn again\n# learn succeed\nwait:on_task_begin:node=r3,task_code=RPC_GROUP_CHECK\nstate:{{r1,pri,2,15},{r2,sec,2,15},{r3,pot,2,0}}\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nstate:{{r1,pri,3,15},{r2,sec,3,15},{r3,sec,3,15}}\n\n# check r1 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{4,-,[r1,r2,r3]}\nstate:{{r1,sec,4,15},{r2,sec,4,15},{r3,sec,4,15}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{5,r3,[r1,r2]}\nstate:{{r1,sec,5,15},{r2,sec,5,15},{r3,pri,5,15}}\n\n# check r3 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-211.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 11\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 5000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-212.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,5,5},{r2,sec,2,5,5}}\n\n# write more\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:begin_write:id=16,key=k16,value=v16,timeout=0\nclient:begin_write:id=17,key=k17,value=v17,timeout=0\nclient:begin_write:id=18,key=k18,value=v18,timeout=0\nclient:begin_write:id=19,key=k19,value=v19,timeout=0\nclient:begin_write:id=20,key=k20,value=v20,timeout=0\n\nstate:{{r1,pri,2,20,5},{r2,sec,2,20,5}}\n\n# add secondary r3\n# will trigger learning in on_learn():\n#   learn_start_decree {=1} < _prepare_list->min_decree() {=11}\n#   learn_start_decree {=1} < _app->last_durable_decree() {=5}\n#   learn checkpoint + log + cache\n#\n#                         (5)\n#   check_point  :      -->|\n#                          (6)   (10)\n#   private_log  :          |~~~~~|\n#                                 (11)                (20)\n#   prepare_list :                 |-------------------|  \n#                    (1)\n#   learn        :    |-->\n#\n#   private_log is in memory because log_private_batch_buffer_kb=4\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,20},{r2,sec,3,20},{r3,sec,3,20}}\n\n# check r1 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\nclient:begin_read:id=16,key=k16,timeout=0\nclient:end_read:id=16,err=err_ok,resp=v16\nclient:begin_read:id=17,key=k17,timeout=0\nclient:end_read:id=17,err=err_ok,resp=v17\nclient:begin_read:id=18,key=k18,timeout=0\nclient:end_read:id=18,err=err_ok,resp=v18\nclient:begin_read:id=19,key=k19,timeout=0\nclient:end_read:id=19,err=err_ok,resp=v19\nclient:begin_read:id=20,key=k20,timeout=0\nclient:end_read:id=20,err=err_ok,resp=v20\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{4,-,[r1,r2,r3]}\nstate:{{r1,sec,4,20},{r2,sec,4,20},{r3,sec,4,20}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{5,r3,[r1,r2]}\nstate:{{r1,sec,5,20},{r2,sec,5,20},{r3,pri,5,20}}\n\n# check r3 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\nclient:begin_read:id=16,key=k16,timeout=0\nclient:end_read:id=16,err=err_ok,resp=v16\nclient:begin_read:id=17,key=k17,timeout=0\nclient:end_read:id=17,err=err_ok,resp=v17\nclient:begin_read:id=18,key=k18,timeout=0\nclient:end_read:id=18,err=err_ok,resp=v18\nclient:begin_read:id=19,key=k19,timeout=0\nclient:end_read:id=19,err=err_ok,resp=v19\nclient:begin_read:id=20,key=k20,timeout=0\nclient:end_read:id=20,err=err_ok,resp=v20\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-212.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 5000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-213.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,5,5},{r2,sec,2,5,5}}\n\n# write more\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:begin_write:id=16,key=k16,value=v16,timeout=0\nclient:begin_write:id=17,key=k17,value=v17,timeout=0\nclient:begin_write:id=18,key=k18,value=v18,timeout=0\nclient:begin_write:id=19,key=k19,value=v19,timeout=0\nclient:begin_write:id=20,key=k20,value=v20,timeout=0\n\nstate:{{r1,pri,2,20,5},{r2,sec,2,20,5}}\n\n# add secondary r3\n# will trigger learning in on_learn():\n#   learn_start_decree {=1} < _prepare_list->min_decree() {=11}\n#   learn_start_decree {=1} < _app->last_durable_decree() {=5}\n#   learn checkpoint + log + cache\n#\n#                         (5)\n#   check_point  :      -->|\n#                          (6)   (10)\n#   private_log  :          |-----|\n#                                 (11)                (20)\n#   prepare_list :                 |-------------------|  \n#                    (1)\n#   learn        :    |-->\n#\n#   private_log is in file because log_private_batch_buffer_kb=0\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,20},{r2,sec,3,20},{r3,sec,3,20}}\n\n# check r1 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\nclient:begin_read:id=16,key=k16,timeout=0\nclient:end_read:id=16,err=err_ok,resp=v16\nclient:begin_read:id=17,key=k17,timeout=0\nclient:end_read:id=17,err=err_ok,resp=v17\nclient:begin_read:id=18,key=k18,timeout=0\nclient:end_read:id=18,err=err_ok,resp=v18\nclient:begin_read:id=19,key=k19,timeout=0\nclient:end_read:id=19,err=err_ok,resp=v19\nclient:begin_read:id=20,key=k20,timeout=0\nclient:end_read:id=20,err=err_ok,resp=v20\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{4,-,[r1,r2,r3]}\nstate:{{r1,sec,4,20},{r2,sec,4,20},{r3,sec,4,20}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{5,r3,[r1,r2]}\nstate:{{r1,sec,5,20},{r2,sec,5,20},{r3,pri,5,20}}\n\n# check r3 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\nclient:begin_read:id=12,key=k12,timeout=0\nclient:end_read:id=12,err=err_ok,resp=v12\nclient:begin_read:id=13,key=k13,timeout=0\nclient:end_read:id=13,err=err_ok,resp=v13\nclient:begin_read:id=14,key=k14,timeout=0\nclient:end_read:id=14,err=err_ok,resp=v14\nclient:begin_read:id=15,key=k15,timeout=0\nclient:end_read:id=15,err=err_ok,resp=v15\nclient:begin_read:id=16,key=k16,timeout=0\nclient:end_read:id=16,err=err_ok,resp=v16\nclient:begin_read:id=17,key=k17,timeout=0\nclient:end_read:id=17,err=err_ok,resp=v17\nclient:begin_read:id=18,key=k18,timeout=0\nclient:end_read:id=18,err=err_ok,resp=v18\nclient:begin_read:id=19,key=k19,timeout=0\nclient:end_read:id=19,err=err_ok,resp=v19\nclient:begin_read:id=20,key=k20,timeout=0\nclient:end_read:id=20,err=err_ok,resp=v20\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-213.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 110\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 5000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 0\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-214.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# add secondary r3 (new replica)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=1} == _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 10\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :      |-->\n#                      (1)\n#\n# change primary to non-primary when some one is learning\n#\n# time +0   (learn@c)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\n# time +10  (learn@m)   (downgrade@c)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\n# time +20  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\n# time +30  (learn@r3)  (downgrade@r1)\nwait:on_rpc_call:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION,from=r1,to=m\nstate:{{r1,ina,2,10},{r2,sec,2,10},{r3,pot,2,0}}\n# time +40  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=r1,task_code=RPC_LEARN\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# time +50  (learn@r3)  (downgrade@r1)\n# this will trigger LPC_DELAY_LEARN\nwait:on_task_begin:node=r3,task_code=RPC_LEARN_ACK\nwait:on_task_enqueue:node=r3,task_code=LPC_DELAY_LEARN\n# after sometimes delay, start another learn\nwait:on_task_begin:node=r3,task_code=LPC_DELAY_LEARN\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nstate:{{r1,sec,3,10},{r2,sec,3,10},{r3,err,2,0}}\nstate:{{r1,sec,3,10},{r2,sec,3,10}}\n\n# change primary from r1 to r2\nclient:replica_config:receiver=r2,type=upgrade_to_primary,node=r2\nconfig:{4,r2,[r1]}\nstate:{{r1,sec,4,10},{r2,pri,4,10}}\n\n# check r3 data correct\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r2\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-214.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-215.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# add secondary r3 (new replica)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=1} == _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 10\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :      |-->\n#                      (1)\n#\n# change primary's ballot when some one is learning\n#\n# time +0   (learn@c)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\n# time +10  (learn@m)   (downgrade@c)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\n# time +20  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\n# time +30  (learn@r3)  (downgrade@r1)\nwait:on_rpc_call:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION,from=r1,to=m\nstate:{{r1,ina,2,10},{r2,sec,2,10},{r3,pot,2,0}}\n# time +40  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=r1,task_code=RPC_LEARN\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# time +50  (learn@r3)  (downgrade@r1)\n# this will trigger LPC_DELAY_LEARN\n# and the r1 changes to primary again, which will trigger group check\nwait:on_task_begin:node=r3,task_code=RPC_LEARN_ACK\nwait:on_task_enqueue:node=r3,task_code=LPC_DELAY_LEARN\n# time +60\n# group check arrives r3, starting another round of learn\nwait:on_task_begin:node=r3,task_code=RPC_GROUP_CHECK\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\n# time +70\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# finally\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{5,-,[r1,r3]}\nstate:{{r1,sec,5,10},{r3,sec,5,10}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{6,r3,[r1]}\nstate:{{r1,sec,6,10},{r3,pri,6,10}}\n\n# check r3 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-215.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-216.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description: test learn\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\n# wait until r1 becomes primary\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\n\n# wait r2 checkpoint done\nstate:{{r1,pri,2,10,10},{r2,sec,2,10,10}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r1\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n# add secondary r3 (new replica)\n# will trigger learning in on_learn():\n#   _prepare_list.count() {=10} > 0 \n#   learn_start_decree {=1} == _prepare_list->min_decree() {=1}\n#   to-be-learn state is covered by prepare list, learn by CACHE, learn_mutation_count = 10\n#\n#                      (1)                 (10)\n#   prepare_list :      |-------------------|  \n#   learn        :      |-->\n#                      (1)\n#\n# change primary's ballot when some one is learning\n#\n# time +0   (learn@c)\nclient:replica_config:receiver=r1,type=add_secondary,node=r3\n# time +10  (learn@m)   (downgrade@c)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\n# time +20  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=m,task_code=RPC_CM_PROPOSE_BALANCER\n# time +30  (learn@r3)  (downgrade@r1)\nwait:on_rpc_call:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION,from=r1,to=m\nstate:{{r1,ina,2,10},{r2,sec,2,10},{r3,pot,2,0}}\n# time +40  (learn@r1)  (downgrade@m)\nwait:on_task_begin:node=r1,task_code=RPC_LEARN\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# time +50  (learn@r3)  (downgrade@r1)\n# this will trigger LPC_DELAY_LEARN on r3\n# and the r1 changes to primary again, which will trigger group check\n# to run LPC_DELAY_LEARN, we inject group check\nstate:{{r1,pri,3,10},{r2,sec,2,10},{r3,pot,2,0}}\ninject:on_rpc_call:rpc_name=RPC_GROUP_CHECK,from=r1,to=r3\n# after sometimes delay, start another learn\nwait:on_task_begin:node=r3,task_code=LPC_DELAY_LEARN\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n# finally\nconfig:{4,r1,[r3]}\nstate:{{r1,pri,4,10},{r3,sec,4,10}}\n\n# change primary from r1 to r3\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{5,-,[r1,r3]}\nstate:{{r1,sec,5,10},{r3,sec,5,10}}\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{6,r3,[r1]}\nstate:{{r1,sec,6,10},{r3,pri,6,10}}\n\n# check r3 data corrent\nclient:begin_read:id=1,key=k1,timeout=0\nwait:on_rpc_call:rpc_name=RPC_SIMPLE_KV_SIMPLE_KV_READ,from=c,to=r3\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-216.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\nmutation_2pc_min_replica_count = 2\n\ngroup_check_disabled = false\ngroup_check_interval_ms = 100000\n\ncheckpoint_disabled = false\ncheckpoint_interval_seconds = 60\n\ngc_disabled = false\ngc_interval_ms = 30000\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nlog_private_disabled = false\nlog_file_size_mb = 32\nlog_shared_batch_buffer_kb = 0\nlog_private_batch_buffer_kb = 4\n\nconfig_sync_disabled = false\nconfig_sync_interval_ms = 30000\n\nlb_interval_ms = 10000\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-0.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# normal case:\n# - normal case\n# - no error injected\n# - just do write and read\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# begin write 1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,1},{r2,sec,3,0},{r3,sec,3,0}}\n\n# end write 1\nclient:end_write:id=1,err=err_ok,resp=0\n\n# begin read 1\nclient:begin_read:id=1,key=k1,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=v1\n\n# begin write 2\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,2},{r2,sec,3,1},{r3,sec,3,1}}\n\n# end write 2\nclient:end_write:id=2,err=err_ok,resp=0\n\n# begin read 2\nclient:begin_read:id=2,key=k2,timeout=0\n\n# end read 2\nclient:end_read:id=2,err=err_ok,resp=v2\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-0.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-1.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - normal recover\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# on meta server the config is the same with the exit state last time\nconfig:{3,r1,[r2,r3]}\n\n# and the states will be normal again, ballot will increase as old primary elected as leader\nstate:{{r1,pri,4,2},{r2,sec,4,2},{r3,sec,4,2}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-1.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-2.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# we just test the case that the system: restart->nothing to write->crashed->restart\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# on meta server the config is the same with the exit state last time\nconfig:{4,r1,[r2,r3]}\n\n# and the states will be normal again\nstate:{{r1,pri,5,2},{r2,sec,5,2},{r3,sec,5,2}}\n\n# we write an new value for k1\nclient:begin_write:id=1,key=k1,value=v3,timeout=0\n\n# and prepared\nstate:{{r1,pri,5,3},{r2,sec,5,2},{r3,sec,5,2}}\nclient:end_write:id=1,err=ERR_OK,resp=0\n\n# and read\nclient:begin_read:id=2,key=k1,timeout=0\nclient:end_read:id=2,err=ERR_OK,resp=v3\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-300-2.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-301.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# occasionally loss of rpc message have no impact on meta server\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\ninject:on_rpc_call:rpc_name=rpc_config_proposal,from=m,to=r1\n\nstate:{{r1,ina,0,0}}\nconfig:{1,r1,[]}\nstate:{{r1,pri,1,0}}\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-301.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-302.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - inject assign_primary proposal\n# - inject update_partition_configuration\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# initial config\nconfig:{0,-,[]}\n\n# start to do load balance\n# inject assign_primary proposal to r1\ninject:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\n\n# meta will restart load balance,\n# and assign primary to r1 again\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\n\n# inject update_partition_configuration (timeout is 5 seconds)\ninject:on_rpc_call:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION,from=r1,to=m\n\n# r1 start to do config_sync (config_sync_interval_ms=4000)\nwait:on_rpc_call:rpc_name=RPC_CM_CONFIG_SYNC,from=r1,to=m\n\n# we ignore the config sync when replica's reconfiguration_task is not null\nconfig:{1,r1,[]}\n\n# and then, another assign means that secondary is added\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nconfig:{2,r1,[r2]}\n\nwait:on_rpc_call:rpc_name=RPC_CONFIG_PROPOSAL,from=m,to=r1\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-302.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 4000\nconfig_sync_disabled = false\n\nlb_interval_ms = 5000\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-303.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# occasionally loss of rpc message have no impact on meta server\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# initial config\nconfig:{0,-,[]}\n\n# at time 15 (fd_grace_seconds=15), start to do load balance\nwait:on_rpc_call:rpc_name=rpc_config_proposal,from=m,to=r1\n\nset:disable_load_balance=1\n\n# after create new replica, r1 try to update_partition_configuration on meta_server\nwait:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\n\n# meta-server receive update_partition_configuration, apply the config change,\n# and try to send reply, but the reply message is lost\nwait:on_task_begin:node=m,task_code=rpc_cm_update_partition_configuration\nconfig:{1,r1,[]}\ninject:on_rpc_reply:rpc_name=rpc_cm_update_partition_configuration_ack,from=m,to=r1\n\n# the message loss of update_partition_configuration reply will lead to resend of the msg,\n# we inject some resending, and the resending will continue and continue, and finally succeed\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\ninject:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\nwait:on_rpc_call:rpc_name=rpc_cm_update_partition_configuration,from=r1,to=m\n\n# meta-server receive the duplicate request, and reply with err=ERR_INVALID_VERSION\nwait:on_task_begin:node=m,task_code=rpc_cm_update_partition_configuration\n\n# then r1 will be the primary\nstate:{{r1,pri,1,0}}\n\nset:disable_load_balance=0\n\n# at last the state will be normal\nconfig:{3,r1,[r2,r3]}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-303.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-304.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - trigger meta-server to call downgrade_to_inactive\n# - trigger meta-server to call remove\n# - trigger meta-server to call downgrade_to_secondary\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# we test the downgrade r2 to inactive\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\n\n# after downgrade, r2 will be inactive\nconfig:{4,r1,[r3]}\n\n# and lb will reassign the r2 be be a secondary\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,0},{r2,sec,5,0},{r3,sec,5,0}}\n\n# and we test the remove r3\nclient:replica_config:receiver=r1,type=remove,node=r3\n\n# the secondary is removed from meta server\nconfig:{6,r1,[r2]}\n\n# and in the end lb will assign an sec again\nconfig:{7,r1,[r2,r3]}\nstate:{{r1,pri,7,0},{r2,sec,7,0},{r3,sec,7,0}}\n\n# then test downgrade r1 to secondary\nclient:replica_config:receiver=r1,type=downgrade_to_secondary,node=r1\nconfig:{8,-,[r1,r2,r3]}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-304.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-305.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - test sending downgrade_to_secondary/downgrade_to_inactive/remove to secondary\n# - test sending upgrade_to_primary to secondary\n# - test sending add_secondary to node\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# downgrade the primary r1 to inactive,\n# and disable lb so we can test the config commands below\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nset:disable_load_balance=1\n\nstate:{{r1,ina,4,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# all these command will be ignored\nclient:replica_config:receiver=r2,type=downgrade_to_secondary,node=r3\nclient:replica_config:receiver=r2,type=downgrade_to_inactive,node=r3\nclient:replica_config:receiver=r2,type=remove,node=r3\n\n# then upgrade r3 to primary\nclient:replica_config:receiver=r3,type=upgrade_to_primary,node=r3\nconfig:{5,r3,[r2]}\n\n# then enable load balance, and the state will be normal again\nclient:replica_config:receiver=r3,type=add_secondary,node=r1\nconfig:{6,r3,[r1,r2]}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-305.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = true\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-306.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - test sending remove to secondary\n# - test sending upgrade_to_primary to secondary\n# - test sending add_secondary to node\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# avoid annoying lb to interrupt the removing action\nset:disable_load_balance=1\n\n# remove the secondary\nclient:replica_config:receiver=r1,type=remove,node=r3\n\n# secondary removed\nconfig:{4,r1,[r2]}\nstate:{{r1,pri,4,0},{r2,sec,4,0}}\n\n# enable the lb again\nset:disable_load_balance=0\n\n# then the state will be normal again\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,0},{r2,sec,5,0},{r3,sec,5,0}}\n\n# avoid annoying lb to interrupt the removing action\nset:disable_load_balance=1\n\n# remove the primary\nclient:replica_config:receiver=r1,type=remove,node=r1\n\n# and primary is removed\nconfig:{6,-,[r2,r3]}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-306.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-307.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - close replica stub on exit\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait for server ready\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n# begin write 1\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,1},{r2,sec,3,0},{r3,sec,3,0}}\n\n# end write 1\nclient:end_write:id=1,err=err_ok,resp=0\n\n# begin read 1\nclient:begin_read:id=1,key=k1,timeout=0\n\n# end read 1\nclient:end_read:id=1,err=err_ok,resp=v1\n\n# begin write 2\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\n\n# wait for commit\nstate:{{r1,pri,3,2},{r2,sec,3,1},{r3,sec,3,1}}\n\n# end write 2\nclient:end_write:id=2,err=err_ok,resp=0\n\n# begin read 2\nclient:begin_read:id=2,key=k2,timeout=0\n\n# end read 2\nclient:end_read:id=2,err=err_ok,resp=v2\n\nset:disable_load_balance=1\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r2\nconfig:{4,r1,[r3]}\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r3\nconfig:{5,r1,[]}\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{6,-,[]}\nstate:{}\n\nset:close_replica_stub_on_exit=1\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-307.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-400.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - test the inactive of primary\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nconfig:{0,-,[]}\nstate:{{r1,ina,0,0}}\nconfig:{1,r1,[]}\n\n# message loss of ack will lead to primary's resending\ninject:on_rpc_reply:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION_ACK,from=m,to=r1\n\n# primary continue inactive\ninject:on_rpc_call:rpc_name=RPC_CM_UPDATE_PARTITION_CONFIGURATION,from=r1,to=m\n\nclient:begin_write:id=1,key=t1,value=v1,timeout=0\n\n#failure of client write, due to the inactive of primary\nclient:end_write:id=1,err=ERR_TIMEOUT,resp=0\n\n# expect the committed decree is 0\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-400.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-401.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - test the inactive of primary\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nconfig:{1,r1,[]}\nset:disable_load_balance=1\n\n# this write will be notified with not enough member\nclient:begin_write:id=1,key=t1,value=v1,timeout=0\nclient:end_write:id=1,err=ERR_TIMEOUT,resp=0\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-401.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-402.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n# - inject aio_call of secondary\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nconfig:{3,r1,[r2,r3]}\nstate:{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\n\nset:disable_load_balance=1\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nclient:begin_write:id=13,key=k13,value=v13,timeout=0\nclient:begin_write:id=14,key=k14,value=v14,timeout=0\nclient:begin_write:id=15,key=k15,value=v15,timeout=0\nclient:begin_write:id=16,key=k16,value=v16,timeout=0\nclient:begin_write:id=17,key=k17,value=v17,timeout=0\nclient:begin_write:id=18,key=k18,value=v18,timeout=0\nclient:begin_write:id=19,key=k19,value=v19,timeout=0\nclient:begin_write:id=20,key=k20,value=v20,timeout=0\nclient:begin_write:id=21,key=k21,value=v21,timeout=0\nclient:begin_write:id=22,key=k22,value=v22,timeout=0\nclient:begin_write:id=23,key=k23,value=v23,timeout=0\nclient:begin_write:id=24,key=k24,value=v24,timeout=0\nclient:begin_write:id=25,key=k25,value=v25,timeout=0\nclient:begin_write:id=26,key=k26,value=v26,timeout=0\nclient:begin_write:id=27,key=k27,value=v27,timeout=0\nclient:begin_write:id=28,key=k28,value=v28,timeout=0\nclient:begin_write:id=29,key=k29,value=v29,timeout=0\nclient:begin_write:id=30,key=k30,value=v30,timeout=0\nclient:begin_write:id=31,key=k31,value=v31,timeout=0\nclient:begin_write:id=32,key=k32,value=v32,timeout=0\nclient:begin_write:id=33,key=k33,value=v33,timeout=0\nclient:begin_write:id=34,key=k34,value=v34,timeout=0\nclient:begin_write:id=35,key=k35,value=v35,timeout=0\nclient:begin_write:id=36,key=k36,value=v36,timeout=0\nclient:begin_write:id=37,key=k37,value=v37,timeout=0\nclient:begin_write:id=38,key=k38,value=v38,timeout=0\nclient:begin_write:id=39,key=k39,value=v39,timeout=0\nclient:begin_write:id=40,key=k40,value=v40,timeout=0\nclient:begin_write:id=41,key=k41,value=v41,timeout=0\nclient:begin_write:id=42,key=k42,value=v42,timeout=0\nclient:begin_write:id=43,key=k43,value=v43,timeout=0\nclient:begin_write:id=44,key=k44,value=v44,timeout=0\nclient:begin_write:id=45,key=k45,value=v45,timeout=0\nclient:begin_write:id=46,key=k46,value=v46,timeout=0\nclient:begin_write:id=47,key=k47,value=v47,timeout=0\nclient:begin_write:id=48,key=k48,value=v48,timeout=0\nclient:begin_write:id=49,key=k49,value=v49,timeout=0\nclient:begin_write:id=50,key=k50,value=v50,timeout=0\nclient:begin_write:id=51,key=k51,value=v51,timeout=0\nclient:begin_write:id=52,key=k52,value=v52,timeout=0\nclient:begin_write:id=53,key=k53,value=v53,timeout=0\nclient:begin_write:id=54,key=k54,value=v54,timeout=0\nclient:begin_write:id=55,key=k55,value=v55,timeout=0\nclient:begin_write:id=56,key=k56,value=v56,timeout=0\nclient:begin_write:id=57,key=k57,value=v57,timeout=0\nclient:begin_write:id=58,key=k58,value=v58,timeout=0\nclient:begin_write:id=59,key=k59,value=v59,timeout=0\nclient:begin_write:id=60,key=k60,value=v60,timeout=0\nclient:begin_write:id=61,key=k61,value=v61,timeout=0\nclient:begin_write:id=62,key=k62,value=v62,timeout=0\nclient:begin_write:id=63,key=k63,value=v63,timeout=0\nclient:begin_write:id=64,key=k64,value=v64,timeout=0\nclient:begin_write:id=65,key=k65,value=v65,timeout=0\nclient:begin_write:id=66,key=k66,value=v66,timeout=0\nclient:begin_write:id=67,key=k67,value=v67,timeout=0\nclient:begin_write:id=68,key=k68,value=v68,timeout=0\nclient:begin_write:id=69,key=k69,value=v69,timeout=0\nclient:begin_write:id=70,key=k70,value=v70,timeout=0\nclient:begin_write:id=71,key=k71,value=v71,timeout=0\nclient:begin_write:id=72,key=k72,value=v72,timeout=0\nclient:begin_write:id=73,key=k73,value=v73,timeout=0\nclient:begin_write:id=74,key=k74,value=v74,timeout=0\nclient:begin_write:id=75,key=k75,value=v75,timeout=0\nclient:begin_write:id=76,key=k76,value=v76,timeout=0\nclient:begin_write:id=77,key=k77,value=v77,timeout=0\nclient:begin_write:id=78,key=k78,value=v78,timeout=0\nclient:begin_write:id=79,key=k79,value=v79,timeout=0\nclient:begin_write:id=80,key=k80,value=v80,timeout=0\nclient:begin_write:id=81,key=k81,value=v81,timeout=0\nclient:begin_write:id=82,key=k82,value=v82,timeout=0\nclient:begin_write:id=83,key=k83,value=v83,timeout=0\nclient:begin_write:id=84,key=k84,value=v84,timeout=0\nclient:begin_write:id=85,key=k85,value=v85,timeout=0\nclient:begin_write:id=86,key=k86,value=v86,timeout=0\nclient:begin_write:id=87,key=k87,value=v87,timeout=0\nclient:begin_write:id=88,key=k88,value=v88,timeout=0\nclient:begin_write:id=89,key=k89,value=v89,timeout=0\nclient:begin_write:id=90,key=k90,value=v90,timeout=0\nclient:begin_write:id=91,key=k91,value=v91,timeout=0\nclient:begin_write:id=92,key=k92,value=v92,timeout=0\nclient:begin_write:id=93,key=k93,value=v93,timeout=0\nclient:begin_write:id=94,key=k94,value=v94,timeout=0\nclient:begin_write:id=95,key=k95,value=v95,timeout=0\nclient:begin_write:id=96,key=k96,value=v96,timeout=0\nclient:begin_write:id=97,key=k97,value=v97,timeout=0\nclient:begin_write:id=98,key=k98,value=v98,timeout=0\nclient:begin_write:id=99,key=k99,value=v99,timeout=0\nclient:begin_write:id=100,key=k100,value=v100,timeout=0\nclient:begin_write:id=101,key=k101,value=v101,timeout=0\nclient:begin_write:id=102,key=k102,value=v102,timeout=0\nclient:begin_write:id=103,key=k103,value=v103,timeout=0\nclient:begin_write:id=104,key=k104,value=v104,timeout=0\nclient:begin_write:id=105,key=k105,value=v105,timeout=0\nclient:begin_write:id=106,key=k106,value=v106,timeout=0\nclient:begin_write:id=107,key=k107,value=v107,timeout=0\nclient:begin_write:id=108,key=k108,value=v108,timeout=0\nclient:begin_write:id=109,key=k109,value=v109,timeout=0\nclient:begin_write:id=110,key=k110,value=v110,timeout=0\nclient:begin_write:id=111,key=k111,value=v111,timeout=0\nclient:begin_write:id=112,key=k112,value=v112,timeout=0\nclient:begin_write:id=113,key=k113,value=v113,timeout=0\nclient:begin_write:id=114,key=k114,value=v114,timeout=0\nclient:begin_write:id=115,key=k115,value=v115,timeout=0\nclient:begin_write:id=116,key=k116,value=v116,timeout=0\nclient:begin_write:id=117,key=k117,value=v117,timeout=0\nclient:begin_write:id=118,key=k118,value=v118,timeout=0\nclient:begin_write:id=119,key=k119,value=v119,timeout=0\nclient:begin_write:id=120,key=k120,value=v120,timeout=0\nclient:begin_write:id=121,key=k121,value=v121,timeout=0\nclient:begin_write:id=122,key=k122,value=v122,timeout=0\nclient:begin_write:id=123,key=k123,value=v123,timeout=0\nclient:begin_write:id=124,key=k124,value=v124,timeout=0\nclient:begin_write:id=125,key=k125,value=v125,timeout=0\nclient:begin_write:id=126,key=k126,value=v126,timeout=0\nclient:begin_write:id=127,key=k127,value=v127,timeout=0\nclient:begin_write:id=128,key=k128,value=v128,timeout=0\nclient:begin_write:id=129,key=k129,value=v129,timeout=0\nclient:begin_write:id=130,key=k130,value=v130,timeout=0\nclient:begin_write:id=131,key=k131,value=v131,timeout=0\nclient:begin_write:id=132,key=k132,value=v132,timeout=0\nclient:begin_write:id=133,key=k133,value=v133,timeout=0\nclient:begin_write:id=134,key=k134,value=v134,timeout=0\nclient:begin_write:id=135,key=k135,value=v135,timeout=0\nclient:begin_write:id=136,key=k136,value=v136,timeout=0\nclient:begin_write:id=137,key=k137,value=v137,timeout=0\nclient:begin_write:id=138,key=k138,value=v138,timeout=0\nclient:begin_write:id=139,key=k139,value=v139,timeout=0\nclient:begin_write:id=140,key=k140,value=v140,timeout=0\nclient:begin_write:id=141,key=k141,value=v141,timeout=0\nclient:begin_write:id=142,key=k142,value=v142,timeout=0\nclient:begin_write:id=143,key=k143,value=v143,timeout=0\nclient:begin_write:id=144,key=k144,value=v144,timeout=0\nclient:begin_write:id=145,key=k145,value=v145,timeout=0\nclient:begin_write:id=146,key=k146,value=v146,timeout=0\nclient:begin_write:id=147,key=k147,value=v147,timeout=0\nclient:begin_write:id=148,key=k148,value=v148,timeout=0\nclient:begin_write:id=149,key=k149,value=v149,timeout=0\nclient:begin_write:id=150,key=k150,value=v150,timeout=0\nclient:begin_write:id=151,key=k151,value=v151,timeout=0\nclient:begin_write:id=152,key=k152,value=v152,timeout=0\nclient:begin_write:id=153,key=k153,value=v153,timeout=0\nclient:begin_write:id=154,key=k154,value=v154,timeout=0\nclient:begin_write:id=155,key=k155,value=v155,timeout=0\nclient:begin_write:id=156,key=k156,value=v156,timeout=0\nclient:begin_write:id=157,key=k157,value=v157,timeout=0\nclient:begin_write:id=158,key=k158,value=v158,timeout=0\nclient:begin_write:id=159,key=k159,value=v159,timeout=0\nclient:begin_write:id=160,key=k160,value=v160,timeout=0\nclient:begin_write:id=161,key=k161,value=v161,timeout=0\nclient:begin_write:id=162,key=k162,value=v162,timeout=0\nclient:begin_write:id=163,key=k163,value=v163,timeout=0\nclient:begin_write:id=164,key=k164,value=v164,timeout=0\nclient:begin_write:id=165,key=k165,value=v165,timeout=0\nclient:begin_write:id=166,key=k166,value=v166,timeout=0\nclient:begin_write:id=167,key=k167,value=v167,timeout=0\nclient:begin_write:id=168,key=k168,value=v168,timeout=0\nclient:begin_write:id=169,key=k169,value=v169,timeout=0\n\ninject:on_aio_call:node=r2,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE\nconfig:{4,r1,[r3]}\n\nstate:{{r1,pri,4,20},{r3,sec,4,11}}\n\nset:disable_load_balance=0\n\nconfig:{5,r1,[r2,r3]}\nstate:{{r1,pri,5,169},{r2,sec,5,169},{r3,sec,5,169}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-402.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 10\nmax_mutation_count_in_prepare_list = 110\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-600.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n#   http://wiki.n.miui.com/pages/viewpage.action?pageId=33947601\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\nstate:{{r1,pri,1,0}}\n\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,1,0},{r2,pot,1,0}}\n\nconfig:{2,r1,[r2]}\nstate:{{r1,pri,2,0},{r2,sec,2,0}}\n\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\nclient:begin_write:id=8,key=k8,value=v8,timeout=0\nclient:begin_write:id=9,key=k9,value=v9,timeout=0\nclient:begin_write:id=10,key=k10,value=v10,timeout=0\nclient:begin_write:id=11,key=k11,value=v11,timeout=0\n\nstate:{{r1,pri,2,11},{r2,sec,2,11}}\n\nclient:begin_read:id=1,key=k1,timeout=0\nclient:end_read:id=1,err=err_ok,resp=v1\nclient:begin_read:id=2,key=k2,timeout=0\nclient:end_read:id=2,err=err_ok,resp=v2\nclient:begin_read:id=3,key=k3,timeout=0\nclient:end_read:id=3,err=err_ok,resp=v3\nclient:begin_read:id=4,key=k4,timeout=0\nclient:end_read:id=4,err=err_ok,resp=v4\nclient:begin_read:id=5,key=k5,timeout=0\nclient:end_read:id=5,err=err_ok,resp=v5\nclient:begin_read:id=6,key=k6,timeout=0\nclient:end_read:id=6,err=err_ok,resp=v6\nclient:begin_read:id=7,key=k7,timeout=0\nclient:end_read:id=7,err=err_ok,resp=v7\nclient:begin_read:id=8,key=k8,timeout=0\nclient:end_read:id=8,err=err_ok,resp=v8\nclient:begin_read:id=9,key=k9,timeout=0\nclient:end_read:id=9,err=err_ok,resp=v9\nclient:begin_read:id=10,key=k10,timeout=0\nclient:end_read:id=10,err=err_ok,resp=v10\nclient:begin_read:id=11,key=k11,timeout=0\nclient:end_read:id=11,err=err_ok,resp=v11\n\nset:disable_load_balance=0\n\nwait:on_task_begin:node=r1,task_code=RPC_CONFIG_PROPOSAL\n\nset:disable_load_balance=1\n\nstate:{{r1,pri,2,11},{r2,sec,2,11},{r3,pot,2,0}}\n\n# learn LT_APP\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\n\n# learn LT_CACHE, delay RPC_LEARN_ACK\nwait:on_rpc_call:rpc_name=RPC_LEARN,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_ACK,from=r1,to=r3\nmodify:on_rpc_response_enqueue:node=r3,task_code=RPC_LEARN_ACK,delay=10,rpc_name=RPC_LEARN_ACK,from=r1,to=r3,modify_delay=10000\n\n# write new data before RPC_LEARN_ACK processed by learner\nclient:begin_write:id=12,key=k12,value=v12,timeout=0\nstate:{{r1,pri,2,12},{r2,sec,2,11},{r3,pot,2,11}}\nclient:end_write:id=12,err=err_ok,resp=0\n\n# process RPC_LEARN_ACK\nwait:on_task_begin:node=r3,task_code=RPC_LEARN_ACK\nstate:{{r1,pri,2,12},{r2,sec,2,11},{r3,pot,2,11}}\n\n# notify LEARN_COMPLETION\nwait:on_rpc_call:rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r3,to=r1\nwait:on_rpc_reply:rpc_name=RPC_LEARN_COMPLETION_NOTIFY_ACK,from=r1,to=r3\n\n# process RPC_LEARN_COMPLETION_NOTIFY_ACK\nwait:on_task_begin:node=r3,task_code=RPC_LEARN_COMPLETION_NOTIFY_ACK\nstate:{{r1,pri,2,12},{r2,sec,2,11},{r3,ina,2,11}}\n\n# replica on r3 will be closed finally\nstate:{{r1,pri,2,12},{r2,sec,2,12}}\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-600.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-601.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n#   learner send two successive notify_learn_completion request\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# Initial ballot is 0, after adding two peers, the ballot becomes 2.\n# wait primary to add r3 as learner\nstate:{{r1,pri,2,0},{r2,sec,2,0},{r3,pot,2,0}}\n\n# r3 learn from r1 successfully, and notify r1, but the message lost\ninject:on_rpc_call:rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r3,to=r1\n\n# then client start to write, and the prepare message to r2 lost\nclient:begin_write:id=1,key=k1,timeout=0\ninject:on_rpc_call:rpc_name=RPC_PREPARE,from=r1,to=r2\n\n# r1 will first downgrade to inactive because of prepare timeout,\n# then increase its ballot to 3 and upgrade again as a primary.\nconfig:{3,r1,[]}\n\n# after r1 update config, it will broadcast group_check, so r3\n# will receive group_check as a learner\nwait:on_rpc_call:rpc_name=RPC_GROUP_CHECK,from=r1,to=r3\n\n# Again r1 will fail on prepare, and increase ballot 3->4.\nconfig:{4,r1,[r3]}\n\n# both r2 & r2 will be secondary eventually\nconfig:{5,r1,[r2,r3]}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-601.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = tracer,test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-602.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n#   learner send two sussive notify_learn_completion request\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait r1 to add r3 as learner\nstate:{{r1,pri,2,0},{r2,sec,2,0},{r3,pot,2,0}}\n\n# r3 will notify r1 learning succeed, delay the message\nmodify:on_rpc_request_enqueue:node=r1,task_code=RPC_LEARN_COMPLETION_NOTIFY,rpc_name=RPC_LEARN_COMPLETION_NOTIFY,from=r3,to=r1,modify_delay=3000\n\n# client start to write, and the prepare message to r2 lost\nclient:begin_write:id=1,key=k1,timeout=0\ninject:on_rpc_call:rpc_name=RPC_PREPARE,from=r1,to=r2\n\n# r2 will be removed due to timeout\nconfig:{3,r1,[]}\n\n# r2 & r3 will learn succeed eventually\nconfig:{5,r1,[r2,r3]}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-602.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = false\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-603.act",
    "content": "# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n# Case Description:\n#   if primary crash and return with potential state, and its commit decree A is bigger than new primary's decree B(A > B),\n#   new primary will not accept old primary as a learner until it's commit decree reach A\n#\n# NOTICE: \n#   run this case, should disable group check through set replication.group_check_disable = true\n\nset:load_balance_for_test=1,not_exit_on_log_failure=1\n\n# wait server ok\nstate:{{r1,pri,3,0}{r2,sec,3,0}{r3,sec,3,0}}\n\n# write data\nclient:begin_write:id=1,key=k1,value=v1,timeout=0\nclient:begin_write:id=2,key=k2,value=v2,timeout=0\nclient:begin_write:id=3,key=k3,value=v3,timeout=0\nclient:begin_write:id=4,key=k4,value=v4,timeout=0\nclient:begin_write:id=5,key=k5,value=v5,timeout=0\nclient:begin_write:id=6,key=k6,value=v6,timeout=0\n\n# primary commit to decree 6, but secondary commit to 1\nstate:{{r1,pri,3,6,6},{r2,sec,3,1,1},{r3,sec,3,1,1}}\n\n\nset:disable_load_balance=1\n# change primary from r1 to r2\nclient:replica_config:receiver=r1,type=downgrade_to_inactive,node=r1\nconfig:{4,-,[r2,r3]}\nstate:{{r1,ina,4,6},{r2,sec,4,1},{r3,sec,4,1}}\n\nclient:replica_config:receiver=r2,type=upgrade_to_primary,node=r2\nconfig:{5,r2,[r3]}\n\n# kick r3 make ballot increase\ninject:on_rpc_call:rpc_name=RPC_PREPARE,from=r2,to=r3\nstate:{{r1,ina,4,6},{r2,pri,5,1},{r3,sec,5,1}}\nconfig:{6,r2,[]}\n\n# delay the r2's replay prepare list, give r3 chance to learn before prepare list[2,6] to commit on r2\nmodify:on_aio_enqueue:node=r2,task_code=LPC_WRITE_REPLICATION_LOG_PRIVATE,modify_delay=100\n\n# add r3 as sec\nclient:replica_config:receiver=r2,type=add_secondary,node=r3\n\nwait:on_rpc_call:rpc_name=RPC_PREPARE,from=r2,to=r3\n\nstate:{{r1,ina,4,6},{r2,pri,7,1},{r3,sec,7,1}}\n\n# delay r3 write share log task, this will lead mutation with decree = 2 will not commit on node 3\nmodify:on_aio_enqueue:node=r3,task_code=LPC_WRITE_REPLICATION_LOG,modify_delay=100\n\n# add r1 to ps, then new pirmary r2's commit decree will commit to decree 6 without tell secondary r3\n# (r3's commit decree is 1, and mutation with decree 2 is waitting for logging)\nclient:replica_config:receiver=r2,type=add_secondary,node=r1\n\n# write a new data\nclient:begin_write:id=7,key=k7,value=v7,timeout=0\n\n# at last all r1 & r3 will come back\nconfig:{8,r2,[r1,r3]}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case-603.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.m]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD,THREAD_POOL_META_STATE\n\n[apps.r]\ntype = replica\nhosted_app_type_name = simple_kv\n\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.c]\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\ntoollets = test_injector\n;toollets = fault_injector\n;toollets = tracer, fault_injector\n;toollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n[tools.simple_logger]\nshort_header = false\nfast_flush = true\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 19\nmin_message_delay_microseconds = 10000\nmax_message_delay_microseconds = 10000\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_META_STATE]\nworker_count = 1\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_REPLICATION_CLIENT_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_REPLICATION_CLIENT_READ]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_request_is_write_operation = true\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_request_is_write_operation = true\nrpc_timeout_milliseconds = 5000\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[meta_server]\nserver_list = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\n\n[replication]\nempty_write_disabled = true\nprepare_timeout_ms_for_secondaries = 1000\nprepare_timeout_ms_for_potential_secondaries = 3000\n\nbatch_write_disabled = true\nstaleness_for_commit = 5\nmax_mutation_count_in_prepare_list = 10\n\nmutation_2pc_min_replica_count = 2\n\ngroup_check_interval_ms = 100000\ngroup_check_disabled = true\n\ngc_interval_ms = 30000\ngc_disabled = false\ngc_memory_replica_interval_ms = 300000\ngc_disk_error_replica_interval_seconds = 172800000\n\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 10\nfd_grace_seconds = 15\n\nworking_dir = .\n\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = false\n\nlog_buffer_size_mb_private = 1\nlog_pending_max_ms_private = 100\nlog_file_size_mb_private = 32\nlog_batch_write_private = false\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = true\n\nconfig_sync_interval_ms = 30000\nconfig_sync_disabled = false\n\n[test]\ntest_file_learning = false\ndelta_state_learning_supported = false\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"case.h\"\n#include \"simple_kv.server.impl.h\"\n#include \"checker.h\"\n\n#include <fmt/printf.h>\n\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/rpc_message.h>\n#include \"replica/replica_stub.h\"\n#include \"runtime/service_engine.h\"\n#include \"meta/server_load_balancer.h\"\n\n#include <iostream>\n#include <string>\n#include <cstdio>\n#include <boost/lexical_cast.hpp>\n#include <boost/algorithm/string.hpp>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nstatic bool\nparse_kv_map(int line_no, const std::string &str, std::map<std::string, std::string> &kv_map)\n{\n    kv_map.clear();\n    std::vector<std::string> splits;\n    dsn::utils::split_args(str.c_str(), splits, ',');\n    for (std::string &i : splits) {\n        if (i.empty())\n            continue;\n        size_t pos = i.find('=');\n        if (pos == std::string::npos) {\n            std::cerr << \"bad line: line_no=\" << line_no << \": invalid key-value pair: \" << i\n                      << std::endl;\n            return false;\n        }\n        std::string key = i.substr(0, pos);\n        std::string value = i.substr(pos + 1);\n        if (kv_map.find(key) != kv_map.end()) {\n            std::cerr << \"bad line: line_no=\" << line_no << \": duplicate key \" << key << std::endl;\n            return false;\n        }\n        kv_map[key] = value;\n    }\n    return true;\n}\n\nstd::string set_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\";\n    int count = 0;\n    if (_null_loop_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"null_loop=\" << _null_loop;\n        count++;\n    }\n    if (_lb_for_test_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"load_balance_for_test=\" << _lb_for_test;\n        count++;\n    }\n    if (_disable_lb_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"disable_load_balance=\" << _disable_lb;\n        count++;\n    }\n    if (_close_replica_stub_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"close_replica_stub_on_exit=\" << _close_replica_stub;\n        count++;\n    }\n    if (_not_exit_on_log_failure_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"not_exit_on_log_failure=\" << _not_exit_on_log_failure;\n        count++;\n    }\n    if (_simple_kv_open_fail_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"simple_kv_open_fail=\" << _simple_kv_open_fail;\n        count++;\n    }\n    if (_simple_kv_close_fail_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"simple_kv_close_fail=\" << _simple_kv_close_fail;\n        count++;\n    }\n    if (_simple_kv_get_checkpoint_fail_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"simple_kv_get_checkpoint_fail=\" << _simple_kv_get_checkpoint_fail;\n        count++;\n    }\n    if (_simple_kv_apply_checkpoint_fail_set) {\n        if (count > 0)\n            oss << \",\";\n        oss << \"simple_kv_apply_checkpoint_fail=\" << _simple_kv_apply_checkpoint_fail;\n        count++;\n    }\n    return oss.str();\n}\n\nbool set_case_line::parse(const std::string &params)\n{\n    if (params.empty())\n        return false;\n    std::map<std::string, std::string> kv_map;\n    if (!parse_kv_map(line_no(), params, kv_map)) {\n        return false;\n    }\n    _null_loop_set = false;\n    _lb_for_test_set = false;\n    _disable_lb_set = false;\n    _close_replica_stub_set = false;\n    _not_exit_on_log_failure_set = false;\n    _simple_kv_open_fail_set = false;\n    _simple_kv_close_fail_set = false;\n    _simple_kv_get_checkpoint_fail_set = false;\n    _simple_kv_apply_checkpoint_fail_set = false;\n    for (auto &kv : kv_map) {\n        const std::string &k = kv.first;\n        const std::string &v = kv.second;\n        if (k == \"null_loop\") {\n            _null_loop = boost::lexical_cast<int>(v);\n            _null_loop_set = true;\n        } else if (k == \"load_balance_for_test\") {\n            _lb_for_test = boost::lexical_cast<bool>(v);\n            _lb_for_test_set = true;\n        } else if (k == \"disable_load_balance\") {\n            _disable_lb = boost::lexical_cast<bool>(v);\n            _disable_lb_set = true;\n        } else if (k == \"close_replica_stub_on_exit\") {\n            _close_replica_stub = boost::lexical_cast<bool>(v);\n            _close_replica_stub_set = true;\n        } else if (k == \"not_exit_on_log_failure\") {\n            _not_exit_on_log_failure = boost::lexical_cast<bool>(v);\n            _not_exit_on_log_failure_set = true;\n        } else if (k == \"simple_kv_open_fail\") {\n            _simple_kv_open_fail = boost::lexical_cast<bool>(v);\n            _simple_kv_open_fail_set = true;\n        } else if (k == \"simple_kv_close_fail\") {\n            _simple_kv_close_fail = boost::lexical_cast<bool>(v);\n            _simple_kv_close_fail_set = true;\n        } else if (k == \"simple_kv_get_checkpoint_fail\") {\n            _simple_kv_get_checkpoint_fail = boost::lexical_cast<bool>(v);\n            _simple_kv_get_checkpoint_fail_set = true;\n        } else if (k == \"simple_kv_apply_checkpoint_fail\") {\n            _simple_kv_apply_checkpoint_fail = boost::lexical_cast<bool>(v);\n            _simple_kv_apply_checkpoint_fail_set = true;\n        } else {\n            std::cerr << \"bad line: line_no=\" << line_no() << \": unknown key \" << k << std::endl;\n            return false;\n        }\n    }\n    return true;\n}\n\nvoid set_case_line::apply_set() const\n{\n    if (_null_loop_set) {\n        test_case::s_null_loop = _null_loop;\n    }\n    if (_lb_for_test_set) {\n        // pass\n    }\n    if (_disable_lb_set) {\n        test_checker::instance().control_balancer(_disable_lb);\n    }\n    if (_close_replica_stub_set) {\n        test_case::s_close_replica_stub_on_exit = _close_replica_stub;\n    }\n    if (_not_exit_on_log_failure_set) {\n        replica_stub::s_not_exit_on_log_failure = _not_exit_on_log_failure;\n    }\n    if (_simple_kv_open_fail_set) {\n        simple_kv_service_impl::s_simple_kv_open_fail = _simple_kv_open_fail;\n    }\n    if (_simple_kv_close_fail_set) {\n        simple_kv_service_impl::s_simple_kv_close_fail = _simple_kv_close_fail;\n    }\n    if (_simple_kv_get_checkpoint_fail_set) {\n        simple_kv_service_impl::s_simple_kv_get_checkpoint_fail = _simple_kv_get_checkpoint_fail;\n    }\n    if (_simple_kv_apply_checkpoint_fail_set) {\n        simple_kv_service_impl::s_simple_kv_apply_checkpoint_fail =\n            _simple_kv_apply_checkpoint_fail;\n    }\n}\n\nstd::string skip_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\" << _count;\n    return oss.str();\n}\n\nbool skip_case_line::parse(const std::string &params)\n{\n    if (params.empty())\n        return false;\n    _count = boost::lexical_cast<int>(params);\n    if (_count <= 0) {\n        std::cerr << \"bad line: line_no=\" << line_no() << \": skip count should > 0\" << std::endl;\n        return false;\n    }\n    _skipped = 0;\n    return true;\n}\n\nstd::string exit_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\";\n    return oss.str();\n}\n\nbool exit_case_line::parse(const std::string &params) { return true; }\n\nstd::string state_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\" << _state.to_string();\n    return oss.str();\n}\n\nbool state_case_line::parse(const std::string &params) { return _state.from_string(params); }\n\nbool state_case_line::check_state(const state_snapshot &cur_state, bool &forward)\n{\n    if (cur_state == _state) {\n        forward = true;\n        return true;\n    }\n    if (cur_state < _state) {\n        forward = false;\n        return true;\n    }\n    return false;\n}\n\nstd::string config_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\" << _config.to_string();\n    return oss.str();\n}\n\nbool config_case_line::parse(const std::string &params) { return _config.from_string(params); }\n\nbool config_case_line::check_config(const parti_config &cur_config, bool &forward)\n{\n    if (cur_config == _config) {\n        forward = true;\n        return true;\n    }\n    if (cur_config < _config) {\n        forward = false;\n        return true;\n    }\n    return false;\n}\n\nstruct event_type_helper\n{\n    std::map<event_type, std::string> type_to_name;\n    std::map<std::string, event_type> name_to_type;\n    std::set<event_type> support_inject_fault;\n    event_type_helper()\n    {\n        add(event_type::task_enqueue, \"on_task_enqueue\", false);\n        add(event_type::task_begin, \"on_task_begin\", false);\n        add(event_type::task_end, \"on_task_end\", false);\n        add(event_type::task_cancelled, \"on_task_cancelled\", false);\n        add(event_type::aio_call, \"on_aio_call\", true);\n        add(event_type::aio_enqueue, \"on_aio_enqueue\", false);\n        add(event_type::rpc_call, \"on_rpc_call\", true);\n        add(event_type::rpc_request_enqueue, \"on_rpc_request_enqueue\", true);\n        add(event_type::rpc_reply, \"on_rpc_reply\", true);\n        add(event_type::rpc_response_enqueue, \"on_rpc_response_enqueue\", true);\n    }\n    void add(event_type type, const std::string &name, bool is_support_inject_fault)\n    {\n        type_to_name[type] = name;\n        name_to_type[name] = type;\n        if (is_support_inject_fault)\n            support_inject_fault.insert(type);\n    }\n    const char *get(event_type type)\n    {\n        auto it = type_to_name.find(type);\n        dassert(it != type_to_name.end(), \"\");\n        return it->second.c_str();\n    }\n    bool get(const std::string &name, event_type &type)\n    {\n        auto it = name_to_type.find(name);\n        if (it == name_to_type.end())\n            return false;\n        type = it->second;\n        return true;\n    }\n    bool is_support_inject_fault(event_type type)\n    {\n        return support_inject_fault.find(type) != support_inject_fault.end();\n    }\n};\nstatic event_type_helper s_event_type_helper;\n\nconst char *event_type_to_string(event_type type) { return s_event_type_helper.get(type); }\n\nbool event_type_from_string(const std::string &name, event_type &type)\n{\n    return s_event_type_helper.get(name, type);\n}\n\nbool event_type_support_inject_fault(event_type type)\n{\n    return s_event_type_helper.is_support_inject_fault(type);\n}\n\nstd::string event::to_string() const\n{\n    std::ostringstream oss;\n    oss << event_type_to_string(type()) << \":\";\n    internal_to_string(oss);\n    std::string str = oss.str();\n    if (str[str.size() - 1] == ',')\n        str.resize(str.size() - 1);\n    return str;\n}\n\nevent *event::parse(int line_no, const std::string &params)\n{\n    size_t pos = params.find(':');\n    if (pos == std::string::npos) {\n        std::cerr << \"bad line: line_no=\" << line_no << std::endl;\n        return nullptr;\n    }\n    std::string type_name = params.substr(0, pos);\n    event_type type;\n    if (!event_type_from_string(type_name, type)) {\n        std::cerr << \"bad line: line_no=\" << line_no << \": invalid event type \" << type_name\n                  << std::endl;\n        return nullptr;\n    }\n    std::map<std::string, std::string> kv_map;\n    if (!parse_kv_map(line_no, params.substr(pos + 1), kv_map)) {\n        return nullptr;\n    }\n    event *e = nullptr;\n    switch (type) {\n    case event_type::task_enqueue:\n        e = new event_on_task_enqueue();\n        break;\n    case event_type::task_begin:\n        e = new event_on_task_begin();\n        break;\n    case event_type::task_end:\n        e = new event_on_task_end();\n        break;\n    case event_type::task_cancelled:\n        e = new event_on_task_cancelled();\n        break;\n    case event_type::aio_call:\n        e = new event_on_aio_call();\n        break;\n    case event_type::aio_enqueue:\n        e = new event_on_aio_enqueue();\n        break;\n    case event_type::rpc_call:\n        e = new event_on_rpc_call();\n        break;\n    case event_type::rpc_request_enqueue:\n        e = new event_on_rpc_request_enqueue();\n        break;\n    case event_type::rpc_reply:\n        e = new event_on_rpc_reply();\n        break;\n    case event_type::rpc_response_enqueue:\n        e = new event_on_rpc_response_enqueue();\n        break;\n    default:\n        dassert(false, \"\");\n    }\n    if (!e->internal_parse(kv_map)) {\n        std::cerr << \"bad line: line_no=\" << line_no\n                  << \": invalid event params: \" << params.substr(pos + 1) << std::endl;\n        delete e;\n        return nullptr;\n    }\n    return e;\n}\n\nvoid event_on_task::internal_to_string(std::ostream &oss) const\n{\n    if (!_node.empty())\n        oss << \"node=\" << _node << \",\";\n    if (!_task_id.empty())\n        oss << \"task_id=\" << _task_id << \",\";\n    if (!_task_code.empty())\n        oss << \"task_code=\" << _task_code << \",\";\n    if (!_delay.empty())\n        oss << \"delay=\" << _delay << \",\";\n}\n\nbool event_on_task::internal_parse(const std::map<std::string, std::string> &kv_map)\n{\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"task_id\")) != kv_map.end())\n        _task_id = it->second;\n    if ((it = kv_map.find(\"node\")) != kv_map.end())\n        _node = it->second;\n    if ((it = kv_map.find(\"task_code\")) != kv_map.end())\n        _task_code = boost::algorithm::to_upper_copy(it->second);\n    if ((it = kv_map.find(\"delay\")) != kv_map.end())\n        _delay = it->second;\n    return true;\n}\n\nbool event_on_task::check_satisfied(const event *ev) const\n{\n    if (type() != ev->type())\n        return false;\n    const event_on_task *e = (const event_on_task *)ev;\n    if (!_task_id.empty() && _task_id != e->_task_id)\n        return false;\n    if (!_node.empty() && _node != e->_node)\n        return false;\n    if (!_task_code.empty() && _task_code != e->_task_code)\n        return false;\n    if (!_delay.empty() && _delay != e->_delay)\n        return false;\n    return true;\n}\n\nvoid event_on_task::init(task *tsk)\n{\n    _task = tsk;\n    if (tsk != nullptr) {\n        char buf[100];\n        sprintf(buf, \"%016\" PRIx64, tsk->id());\n        _task_id = buf;\n        _node = tsk->node()->full_name();\n        _task_code = tsk->code().to_string();\n        _delay = boost::lexical_cast<std::string>(tsk->delay_milliseconds());\n    }\n}\n\nvoid event_on_rpc::internal_to_string(std::ostream &oss) const\n{\n    event_on_task::internal_to_string(oss);\n    if (!_trace_id.empty())\n        oss << \"trace_id=\" << _trace_id << \",\";\n    if (!_rpc_name.empty())\n        oss << \"rpc_name=\" << _rpc_name << \",\";\n    if (!_from.empty())\n        oss << \"from=\" << _from << \",\";\n    if (!_to.empty())\n        oss << \"to=\" << _to << \",\";\n}\n\nbool event_on_rpc::internal_parse(const std::map<std::string, std::string> &kv_map)\n{\n    if (!event_on_task::internal_parse(kv_map))\n        return false;\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"trace_id\")) != kv_map.end())\n        _trace_id = it->second;\n    if ((it = kv_map.find(\"rpc_name\")) != kv_map.end())\n        _rpc_name = boost::algorithm::to_upper_copy(it->second);\n    if ((it = kv_map.find(\"from\")) != kv_map.end())\n        _from = it->second;\n    if ((it = kv_map.find(\"to\")) != kv_map.end())\n        _to = it->second;\n    return true;\n}\n\nbool event_on_rpc::check_satisfied(const event *ev) const\n{\n    if (!event_on_task::check_satisfied(ev))\n        return false;\n    const event_on_rpc *e = (const event_on_rpc *)ev;\n    if (!_trace_id.empty() && _trace_id != e->_trace_id)\n        return false;\n    if (!_rpc_name.empty() && _rpc_name != e->_rpc_name)\n        return false;\n    if (!_from.empty() && _from != e->_from)\n        return false;\n    if (!_to.empty() && _to != e->_to)\n        return false;\n    return true;\n}\n\nvoid event_on_rpc::init(message_ex *msg, task *tsk)\n{\n    event_on_task::init(tsk);\n    if (msg != nullptr) {\n        _trace_id = fmt::sprintf(\"%016llx\", msg->header->trace_id);\n        _rpc_name = msg->header->rpc_name;\n        _from = address_to_node(msg->header->from_address);\n        _to = address_to_node(msg->to_address);\n    }\n}\n\nvoid event_on_rpc_request_enqueue::init(rpc_request_task *tsk)\n{\n    event_on_rpc::init(tsk->get_request(), tsk);\n}\n\nvoid event_on_rpc_response_enqueue::internal_to_string(std::ostream &oss) const\n{\n    event_on_rpc::internal_to_string(oss);\n    if (!_err.empty())\n        oss << \"err=\" << _err << \",\";\n}\n\nbool event_on_rpc_response_enqueue::internal_parse(const std::map<std::string, std::string> &kv_map)\n{\n    if (!event_on_rpc::internal_parse(kv_map))\n        return false;\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"err\")) != kv_map.end())\n        _err = it->second;\n    return true;\n}\n\nbool event_on_rpc_response_enqueue::check_satisfied(const event *ev) const\n{\n    if (!event_on_rpc::check_satisfied(ev))\n        return false;\n    event_on_rpc_response_enqueue *e = (event_on_rpc_response_enqueue *)ev;\n    if (!_err.empty() && _err != e->_err)\n        return false;\n    return true;\n}\n\nvoid event_on_rpc_response_enqueue::init(rpc_response_task *tsk)\n{\n    event_on_rpc::init(tsk->get_request(), tsk); // use request here because response may be nullptr\n    _rpc_name += \"_ACK\";\n    _from.swap(_to);\n    _err = tsk->error().to_string();\n}\n\nvoid event_on_aio::internal_to_string(std::ostream &oss) const\n{\n    event_on_task::internal_to_string(oss);\n    if (!_type.empty())\n        oss << \"type=\" << _type << \",\";\n    if (!_file_offset.empty())\n        oss << \"file_offset=\" << _file_offset << \",\";\n    if (!_buffer_size.empty())\n        oss << \"buffer_size=\" << _buffer_size << \",\";\n}\n\nbool event_on_aio::internal_parse(const std::map<std::string, std::string> &kv_map)\n{\n    if (!event_on_task::internal_parse(kv_map))\n        return false;\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"type\")) != kv_map.end())\n        _type = boost::algorithm::to_upper_copy(it->second);\n    if ((it = kv_map.find(\"file_offset\")) != kv_map.end())\n        _file_offset = it->second;\n    if ((it = kv_map.find(\"buffer_size\")) != kv_map.end())\n        _buffer_size = it->second;\n    return true;\n}\n\nbool event_on_aio::check_satisfied(const event *ev) const\n{\n    if (!event_on_task::check_satisfied(ev))\n        return false;\n    event_on_aio *e = (event_on_aio *)ev;\n    if (!_type.empty() && _type != e->_type)\n        return false;\n    if (!_file_offset.empty() && _file_offset != e->_file_offset)\n        return false;\n    if (!_buffer_size.empty() && _buffer_size != e->_buffer_size)\n        return false;\n    return true;\n}\n\nvoid event_on_aio::init(aio_task *tsk)\n{\n    event_on_task::init(tsk);\n    if (tsk->get_aio_context()->type == dsn::AIO_Invalid)\n        return; // for flush task, the type is AIO_Invalid\n    _type = (tsk->get_aio_context()->type == dsn::AIO_Read ? \"READ\" : \"WRITE\");\n    _file_offset = boost::lexical_cast<std::string>(tsk->get_aio_context()->file_offset);\n    _buffer_size = boost::lexical_cast<std::string>(tsk->get_aio_context()->buffer_size);\n}\n\nvoid event_on_aio_enqueue::internal_to_string(std::ostream &oss) const\n{\n    event_on_aio::internal_to_string(oss);\n    if (!_err.empty())\n        oss << \"err=\" << _err << \",\";\n    if (!_transferred_size.empty())\n        oss << \"transferred_size=\" << _transferred_size << \",\";\n}\n\nbool event_on_aio_enqueue::internal_parse(const std::map<std::string, std::string> &kv_map)\n{\n    if (!event_on_aio::internal_parse(kv_map))\n        return false;\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"err\")) != kv_map.end())\n        _err = boost::algorithm::to_upper_copy(it->second);\n    if ((it = kv_map.find(\"transferred_size\")) != kv_map.end())\n        _transferred_size = it->second;\n    return true;\n}\n\nbool event_on_aio_enqueue::check_satisfied(const event *ev) const\n{\n    if (!event_on_aio::check_satisfied(ev))\n        return false;\n    event_on_aio_enqueue *e = (event_on_aio_enqueue *)ev;\n    if (!_err.empty() && _err != e->_err)\n        return false;\n    if (!_transferred_size.empty() && _transferred_size != e->_transferred_size)\n        return false;\n    return true;\n}\n\nvoid event_on_aio_enqueue::init(aio_task *tsk)\n{\n    event_on_aio::init(tsk);\n    _err = tsk->error().to_string();\n    _transferred_size = boost::lexical_cast<std::string>(tsk->get_transferred_size());\n}\n\nstd::string event_case_line::to_string() const { return name() + \":\" + _event_cond->to_string(); }\n\nbool event_case_line::parse(const std::string &params)\n{\n    _event_cond = event::parse(line_no(), params);\n    return _event_cond != nullptr;\n}\n\nbool event_case_line::check_satisfied(const event *ev) const\n{\n    return _event_cond->check_satisfied(ev);\n}\n\nbool inject_case_line::parse(const std::string &params)\n{\n    if (!event_case_line::parse(params))\n        return false;\n    if (!event_type_support_inject_fault(_event_cond->type())) {\n        std::cerr << \"bad line: line_no=\" << line_no() << \": event type \"\n                  << event_type_to_string(_event_cond->type()) << \" not support inject fault\"\n                  << std::endl;\n        return false;\n    }\n    return true;\n}\n\nstd::string modify_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << event_case_line::to_string() << \",modify_delay=\" << _modify_delay;\n    return oss.str();\n}\n\nbool modify_case_line::parse(const std::string &params)\n{\n    if (!event_case_line::parse(params))\n        return false;\n    size_t pos = params.find(':');\n    dassert(pos != std::string::npos, \"\");\n    std::map<std::string, std::string> kv_map;\n    bool parse_ret = parse_kv_map(line_no(), params.substr(pos + 1), kv_map);\n    dassert(parse_ret, \"\");\n    std::map<std::string, std::string>::const_iterator it;\n    if ((it = kv_map.find(\"modify_delay\")) != kv_map.end())\n        _modify_delay = it->second;\n    return true;\n}\n\nvoid modify_case_line::modify(const event *ev)\n{\n    if (!_modify_delay.empty()) {\n        const event_on_task *e = dynamic_cast<const event_on_task *>(ev);\n        dassert(e != nullptr, \"\");\n        dassert(e->_task != nullptr, \"\");\n        e->_task->set_delay(boost::lexical_cast<int>(_modify_delay));\n    }\n}\n\nstd::string client_case_line::to_string() const\n{\n    std::ostringstream oss;\n    oss << name() << \":\" << type_name() << \":\";\n    switch (_type) {\n    case begin_write: {\n        oss << \"id=\" << _id << \",key=\" << _key << \",value=\" << _value << \",timeout=\" << _timeout;\n        break;\n    }\n    case begin_read: {\n        oss << \"id=\" << _id << \",key=\" << _key << \",timeout=\" << _timeout;\n        break;\n    }\n    case end_write: {\n        oss << \"id=\" << _id << \",err=\" << _err.to_string() << \",resp=\" << _write_resp;\n        break;\n    }\n    case end_read: {\n        oss << \"id=\" << _id << \",err=\" << _err.to_string() << \",resp=\" << _read_resp;\n        break;\n    }\n    case replica_config: {\n        oss << \"receiver=\" << address_to_node(_config_receiver)\n            << \",type=\" << config_command_to_string(_config_type)\n            << \",node=\" << address_to_node(_config_node);\n        break;\n    }\n    default:\n        dassert(false, \"\");\n    }\n    return oss.str();\n}\n\nbool client_case_line::parse(const std::string &params)\n{\n    size_t pos = params.find(':');\n    if (pos == std::string::npos) {\n        std::cerr << \"bad line: line_no=\" << line_no() << std::endl;\n        return false;\n    }\n    std::string type_name = params.substr(0, pos);\n    if (!parse_type_name(type_name)) {\n        std::cerr << \"bad line: line_no=\" << line_no() << \": invalid client type \" << type_name\n                  << std::endl;\n        return false;\n    }\n    std::map<std::string, std::string> kv_map;\n    if (!parse_kv_map(line_no(), params.substr(pos + 1), kv_map)) {\n        return false;\n    }\n    bool parse_ok = true;\n    switch (_type) {\n    case begin_write: {\n        _id = boost::lexical_cast<int>(kv_map[\"id\"]);\n        _key = kv_map[\"key\"];\n        _value = kv_map[\"value\"];\n        _timeout = boost::lexical_cast<int>(kv_map[\"timeout\"]);\n        break;\n    }\n    case begin_read: {\n        _id = boost::lexical_cast<int>(kv_map[\"id\"]);\n        _key = kv_map[\"key\"];\n        _timeout = boost::lexical_cast<int>(kv_map[\"timeout\"]);\n        break;\n    }\n    case end_write: {\n        _id = boost::lexical_cast<int>(kv_map[\"id\"]);\n        _err = dsn::error_code::try_get(boost::algorithm::to_upper_copy(kv_map[\"err\"]).c_str(),\n                                        ERR_UNKNOWN);\n        _write_resp = boost::lexical_cast<int>(kv_map[\"resp\"]);\n        if (_err == ERR_UNKNOWN)\n            parse_ok = false;\n        break;\n    }\n    case end_read: {\n        _id = boost::lexical_cast<int>(kv_map[\"id\"]);\n        _err = dsn::error_code::try_get(boost::algorithm::to_upper_copy(kv_map[\"err\"]).c_str(),\n                                        ERR_UNKNOWN);\n        _read_resp = kv_map[\"resp\"];\n        if (_err == ERR_UNKNOWN)\n            parse_ok = false;\n        break;\n    }\n    case replica_config: {\n        _config_receiver = node_to_address(kv_map[\"receiver\"]);\n        _config_type = parse_config_command(kv_map[\"type\"]);\n        _config_node = node_to_address(kv_map[\"node\"]);\n        if (_config_receiver.is_invalid() || _config_type == config_type::CT_INVALID ||\n            _config_node.is_invalid())\n            parse_ok = false;\n        break;\n    }\n    default:\n        dassert(false, \"\");\n    }\n    if (!parse_ok) {\n        std::cerr << \"bad line: line_no=\" << line_no() << \": unknown error: \" << kv_map[\"err\"]\n                  << std::endl;\n        return false;\n    }\n    return true;\n}\n\nstd::string client_case_line::type_name() const\n{\n    switch (_type) {\n    case begin_write:\n        return \"begin_write\";\n    case begin_read:\n        return \"begin_read\";\n    case end_write:\n        return \"end_write\";\n    case end_read:\n        return \"end_read\";\n    case replica_config:\n        return \"replica_config\";\n    default:\n        dassert(false, \"\");\n    }\n    return \"\";\n}\n\nbool client_case_line::parse_type_name(const std::string &name)\n{\n    if (name == \"begin_write\")\n        _type = begin_write;\n    else if (name == \"begin_read\")\n        _type = begin_read;\n    else if (name == \"end_write\")\n        _type = end_write;\n    else if (name == \"end_read\")\n        _type = end_read;\n    else if (name == \"replica_config\")\n        _type = replica_config;\n    else\n        return false;\n    return true;\n}\n\nstatic const char *s_replica_config_commands[] = {\"none\",\n                                                  \"assign_primary\",\n                                                  \"upgrade_to_primary\",\n                                                  \"add_secondary\",\n                                                  \"upgrade_to_secondary\",\n                                                  \"downgrade_to_secondary\",\n                                                  \"downgrade_to_inactive\",\n                                                  \"remove\",\n                                                  nullptr};\n\ndsn::replication::config_type::type\nclient_case_line::parse_config_command(const std::string &command_name) const\n{\n    for (int i = 0; s_replica_config_commands[i] != nullptr; ++i) {\n        if (boost::iequals(command_name.c_str(), s_replica_config_commands[i])) {\n            return (dsn::replication::config_type::type)i;\n        }\n    }\n    return config_type::CT_INVALID;\n}\n\nstd::string\nclient_case_line::config_command_to_string(dsn::replication::config_type::type cfg_command) const\n{\n    return s_replica_config_commands[cfg_command];\n}\n\nvoid client_case_line::get_write_params(int &id,\n                                        std::string &key,\n                                        std::string &value,\n                                        int &timeout_ms) const\n{\n    dassert(_type == begin_write, \"\");\n    id = _id;\n    key = _key;\n    value = _value;\n    timeout_ms = _timeout;\n}\n\nvoid client_case_line::get_read_params(int &id, std::string &key, int &timeout_ms) const\n{\n    dassert(_type == begin_read, \"\");\n    id = _id;\n    key = _key;\n    timeout_ms = _timeout;\n}\n\nvoid client_case_line::get_replica_config_params(rpc_address &receiver,\n                                                 dsn::replication::config_type::type &type,\n                                                 rpc_address &node) const\n{\n    dassert(_type == replica_config, \"\");\n    receiver = _config_receiver;\n    type = _config_type;\n    node = _config_node;\n}\n\nbool client_case_line::check_write_result(int id, ::dsn::error_code err, int32_t resp)\n{\n    return id == _id && err == _err && (err != dsn::ERR_OK || resp == _write_resp);\n}\n\nbool client_case_line::check_read_result(int id, ::dsn::error_code err, const std::string &resp)\n{\n    return id == _id && err == _err && (err != dsn::ERR_OK || resp == _read_resp);\n}\n\nbool test_case::s_inited = false;\nint test_case::s_null_loop = 10000;\nbool test_case::s_close_replica_stub_on_exit = false;\n\ntest_case::test_case() : _next(0), _null_loop_count(0)\n{\n    register_creator<set_case_line>();\n    register_creator<skip_case_line>();\n    register_creator<exit_case_line>();\n    register_creator<state_case_line>();\n    register_creator<config_case_line>();\n    register_creator<wait_case_line>();\n    register_creator<inject_case_line>();\n    register_creator<modify_case_line>();\n    register_creator<client_case_line>();\n}\n\ntest_case::~test_case() {}\n\nbool test_case::init(const std::string &case_input)\n{\n    if (s_inited) {\n        return false;\n    }\n\n    std::string input_postfix = \".act\";\n    std::string output_postfix = \".out\";\n\n    size_t pos = case_input.find(input_postfix);\n    if (pos == std::string::npos || pos + input_postfix.size() != case_input.size()) {\n        std::cerr << \"invalid case input file name: \" << case_input << std::endl;\n        return false;\n    }\n\n    _name = case_input.substr(0, pos);\n    std::string case_output = _name + output_postfix;\n\n    _output.open(case_output);\n    if (!_output) {\n        std::cerr << \"open case output file failed: \" << case_input << std::endl;\n        return false;\n    }\n\n    std::ifstream fin(case_input.c_str());\n    if (!fin) {\n        std::cerr << \"open case input file failed: \" << case_input << std::endl;\n        return false;\n    }\n\n    _case_lines.push_back(nullptr); // the first one is null\n\n    int line_no = 0;\n    std::string line;\n    while (!fin.eof()) {\n        std::getline(fin, line);\n        line_no++;\n        boost::algorithm::trim(line);\n        if (line.empty() || line[0] == '#') {\n            // ignore comments\n            continue;\n        }\n        size_t pos = line.find(':');\n        if (pos == std::string::npos) {\n            std::cerr << \"bad line: line_no=\" << line_no << std::endl;\n            return false;\n        }\n        std::string type = line.substr(0, pos);\n        if (_creators.find(type) == _creators.end()) {\n            std::cerr << \"bad line: line_no=\" << line_no << \": invalid case line type \" << type\n                      << std::endl;\n            return false;\n        }\n\n        std::string params = line.substr(pos + 1);\n        case_line *cl = _creators[type](line_no, params);\n        if (cl == nullptr) {\n            std::cerr << \"bad line: line_no=\" << line_no << \": invalid params: \" << params\n                      << std::endl;\n            return false;\n        }\n\n        _case_lines.push_back(cl);\n    }\n\n    if (_case_lines.size() == 1) // only the first null one\n    {\n        std::cerr << \"empty case input file: \" << case_input << std::endl;\n        return false;\n    }\n\n    forward();\n\n    ddebug(\"=== init %s succeed\", _name.c_str());\n\n    s_inited = true;\n    return true;\n}\n\nvoid test_case::forward()\n{\n    _null_loop_count = 0; // reset null loop count\n    dassert(_next < _case_lines.size(), \"\");\n    while (true) {\n        case_line *cl = _case_lines[_next];\n        if (cl != nullptr) {\n            if (cl->name() != skip_case_line::NAME()) {\n                output(cl->to_string());\n                print(cl, \"\");\n            }\n            ddebug(\"=== on_case_forward:[%d]%s\", cl->line_no(), cl->to_string().c_str());\n        }\n        _next++;\n        if (_next >= _case_lines.size()) {\n            ddebug(\"=== on_case_done\");\n            g_done = true;\n            break;\n        }\n        // pre-view the next one\n        cl = _case_lines[_next];\n        if (cl->name() == set_case_line::NAME()) {\n            set_case_line *scl = static_cast<set_case_line *>(cl);\n            scl->apply_set();\n        } else if (cl->name() == exit_case_line::NAME()) {\n            ddebug(\"=== on_case_exit\");\n            g_done = true;\n            break;\n        } else {\n            if (cl->name() == skip_case_line::NAME()) {\n                output(cl->to_string());\n                print(cl, \"\");\n            }\n            break;\n        }\n    }\n    notify_check_client();\n}\n\nvoid test_case::fail(const std::string &other)\n{\n    _null_loop_count = 0; // reset null loop count\n    dassert(_next < _case_lines.size(), \"\");\n    case_line *cl = _case_lines[_next];\n    output(other);\n    print(cl, other);\n    derror(\"=== on_case_failure:line=%d,case=%s\", cl->line_no(), cl->to_string().c_str());\n    g_fail = true;\n    g_done = true;\n    notify_check_client();\n}\n\nvoid test_case::output(const std::string &line)\n{\n    _output << line << std::endl;\n    _output.flush();\n}\n\nvoid test_case::print(case_line *cl, const std::string &other, bool is_skip)\n{\n    if (is_skip) {\n        dassert(cl == nullptr, \"\");\n        dassert(!other.empty(), \"\");\n        std::cout << \"    s  \" << other << std::endl;\n        return;\n    }\n\n    if (cl == nullptr) {\n        dassert(!other.empty(), \"\");\n        std::cout << \"    +  \" << other << std::endl;\n    } else // cl != nullptr\n    {\n        char buf[100];\n        sprintf(buf, \"%5d  \", cl->line_no());\n        std::cout << buf << cl->to_string() << std::endl;\n        if (!other.empty()) {\n            std::cout << \" <==>  \" << other << std::endl;\n        }\n    }\n}\n\nbool test_case::check_skip(bool consume_one)\n{\n    if (g_done)\n        return true;\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != skip_case_line::NAME()) {\n        return false;\n    }\n    skip_case_line *cl = static_cast<skip_case_line *>(c);\n    if (consume_one) {\n        cl->skip_one();\n        ddebug(\"=== on_skip_one:skipped=%d/%d\", cl->skipped(), cl->count());\n    }\n    if (cl->is_skip_done()) {\n        forward();\n    }\n    return true;\n}\n\nvoid test_case::wait_check_client() { _client_sema.wait(); }\n\nvoid test_case::notify_check_client() { _client_sema.signal(); }\n\nbool test_case::check_client_instruction(client_case_line::client_type type)\n{\n    if (g_done)\n        return false;\n\n    if (check_skip(false))\n        return false;\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != client_case_line::NAME()) {\n        return false;\n    }\n    client_case_line *cl = static_cast<client_case_line *>(c);\n    if (cl->type() != type) {\n        return false;\n    }\n    return true;\n}\n\nbool test_case::check_client_write(int &id, std::string &key, std::string &value, int &timeout_ms)\n{\n    if (!check_client_instruction(client_case_line::begin_write))\n        return false;\n    client_case_line *cl = static_cast<client_case_line *>(_case_lines[_next]);\n    cl->get_write_params(id, key, value, timeout_ms);\n    forward();\n    return true;\n}\n\nbool test_case::check_replica_config(rpc_address &receiver,\n                                     dsn::replication::config_type::type &type,\n                                     rpc_address &node)\n{\n    if (!check_client_instruction(client_case_line::replica_config))\n        return false;\n    client_case_line *cl = static_cast<client_case_line *>(_case_lines[_next]);\n    cl->get_replica_config_params(receiver, type, node);\n    forward();\n    return true;\n}\n\nbool test_case::check_client_read(int &id, std::string &key, int &timeout_ms)\n{\n    if (!check_client_instruction(client_case_line::begin_read))\n        return false;\n    client_case_line *cl = static_cast<client_case_line *>(_case_lines[_next]);\n    cl->get_read_params(id, key, timeout_ms);\n    forward();\n    return true;\n}\n\nvoid test_case::on_end_write(int id, ::dsn::error_code err, int32_t resp)\n{\n    if (g_done)\n        return;\n\n    char buf[1024];\n    snprintf_p(buf,\n               1024,\n               \"%s:end_write:id=%d,err=%s,resp=%d\",\n               client_case_line::NAME(),\n               id,\n               err.to_string(),\n               resp);\n\n    ddebug(\"=== on_end_write:id=%d,err=%s,resp=%d\", id, err.to_string(), resp);\n\n    if (check_skip(true)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != client_case_line::NAME()) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    client_case_line *cl = static_cast<client_case_line *>(c);\n    if (cl->type() != client_case_line::end_write) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    if (!cl->check_write_result(id, err, resp)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    forward();\n}\n\nvoid test_case::on_end_read(int id, ::dsn::error_code err, const std::string &resp)\n{\n    if (g_done)\n        return;\n\n    char buf[1024];\n    snprintf_p(buf,\n               1024,\n               \"%s:end_read:id=%d,err=%s,resp=%s\",\n               client_case_line::NAME(),\n               id,\n               err.to_string(),\n               resp.c_str());\n\n    ddebug(\"=== on_end_read:id=%d,err=%s,resp=%s\", id, err.to_string(), resp.c_str());\n\n    if (check_skip(true)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != client_case_line::NAME()) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    client_case_line *cl = static_cast<client_case_line *>(c);\n    if (cl->type() != client_case_line::end_read) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    if (!cl->check_read_result(id, err, resp)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n    forward();\n}\n\nbool test_case::on_event(const event *ev)\n{\n    if (g_done)\n        return true;\n\n    ddebug(\"=== %s\", ev->to_string().c_str());\n\n    if (check_skip(false))\n        return true;\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != inject_case_line::NAME() && c->name() != modify_case_line::NAME() &&\n        c->name() != wait_case_line::NAME()) {\n        return true;\n    }\n\n    event_case_line *cl = static_cast<event_case_line *>(c);\n    if (!cl->check_satisfied(ev)) {\n        return true;\n    }\n\n    bool ret = true;\n    if (cl->name() == inject_case_line::NAME()) {\n        // should inject fault\n        ret = false;\n    } else if (cl->name() == modify_case_line::NAME()) {\n        modify_case_line *mcl = static_cast<modify_case_line *>(cl);\n        mcl->modify(ev);\n    }\n\n    forward();\n    return ret;\n}\n\nvoid test_case::on_check()\n{\n    if (g_done)\n        return;\n\n    ++_null_loop_count;\n    if (s_null_loop > 0 && _null_loop_count > s_null_loop) {\n        fail(\"null_loop:\" + boost::lexical_cast<std::string>(s_null_loop));\n    }\n}\n\nvoid test_case::on_config_change(const parti_config &last, const parti_config &cur)\n{\n    if (g_done)\n        return;\n\n    _null_loop_count = 0; // reset null loop count\n\n    std::string buf = std::string(config_case_line::NAME()) + \":\" + cur.to_string();\n    ddebug(\"=== on_config_change:%s\", cur.to_string().c_str());\n\n    if (check_skip(true)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != config_case_line::NAME()) {\n        output(buf);\n        print(nullptr, buf);\n        return;\n    }\n    config_case_line *cl = static_cast<config_case_line *>(c);\n    bool do_forward;\n    if (!cl->check_config(cur, do_forward)) {\n        fail(buf);\n        return;\n    }\n    if (do_forward) {\n        forward();\n    } else {\n        output(buf);\n        print(nullptr, buf);\n    }\n}\n\nvoid test_case::on_state_change(const state_snapshot &last, const state_snapshot &cur)\n{\n    if (g_done)\n        return;\n\n    _null_loop_count = 0; // reset null loop count\n\n    std::string buf = std::string(state_case_line::NAME()) + \":\" + cur.to_string();\n    ddebug(\"=== on_state_change:%s\\n%s\", cur.to_string().c_str(), cur.diff_string(last).c_str());\n\n    if (check_skip(true)) {\n        output(buf);\n        print(nullptr, buf, true);\n        return;\n    }\n\n    case_line *c = _case_lines[_next];\n    if (c->name() != state_case_line::NAME()) {\n        output(buf);\n        print(nullptr, buf);\n        return;\n    }\n    state_case_line *cl = static_cast<state_case_line *>(c);\n    bool do_forward;\n    if (!cl->check_state(cur, do_forward)) {\n        fail(buf);\n        return;\n    }\n    if (do_forward) {\n        forward();\n    } else {\n        output(buf);\n        print(nullptr, buf);\n    }\n}\n\nvoid test_case::internal_register_creator(const std::string &name, case_line_creator creator)\n{\n    dassert(_creators.find(name) == _creators.end(), \"\");\n    _creators[name] = creator;\n}\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/case.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include \"common.h\"\n\n#include <dsn/utility/singleton.h>\n#include <dsn/tool-api/zlocks.h>\n\n#include <fstream>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nclass case_line\n{\npublic:\n    template <typename T>\n    static case_line *create(int line_no, const std::string &params)\n    {\n        case_line *cl = new T();\n        cl->set_line_no(line_no);\n        if (!cl->parse(params)) {\n            delete cl;\n            return nullptr;\n        }\n        return cl;\n    }\n\npublic:\n    virtual ~case_line() {}\n    int line_no() const { return _line_no; }\n    void set_line_no(int line_no) { _line_no = line_no; }\n    virtual std::string name() const = 0;\n    virtual std::string to_string() const = 0;\n    virtual bool parse(const std::string &params) = 0;\n\nprivate:\n    int _line_no;\n};\n\nclass set_case_line : public case_line\n{\npublic:\n    static const char *NAME() { return \"set\"; }\n    virtual ~set_case_line() {}\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    void apply_set() const;\n\nprivate:\n    bool _null_loop_set;\n    int _null_loop;\n    bool _lb_for_test;\n    bool _lb_for_test_set;\n    bool _disable_lb;\n    bool _disable_lb_set;\n    bool _close_replica_stub;\n    bool _close_replica_stub_set;\n    bool _not_exit_on_log_failure;\n    bool _not_exit_on_log_failure_set;\n    bool _simple_kv_open_fail;\n    bool _simple_kv_open_fail_set;\n    bool _simple_kv_close_fail;\n    bool _simple_kv_close_fail_set;\n    bool _simple_kv_get_checkpoint_fail;\n    bool _simple_kv_get_checkpoint_fail_set;\n    bool _simple_kv_apply_checkpoint_fail;\n    bool _simple_kv_apply_checkpoint_fail_set;\n};\n\n// SKIP:100\nclass skip_case_line : public case_line\n{\npublic:\n    static const char *NAME() { return \"skip\"; }\n    virtual ~skip_case_line() {}\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    int count() const { return _count; }\n    int skipped() const { return _skipped; }\n    void skip_one() { _skipped++; }\n    bool is_skip_done() const { return _skipped >= _count; }\nprivate:\n    int _count;\n    int _skipped;\n};\n\n// EXIT:\nclass exit_case_line : public case_line\n{\npublic:\n    static const char *NAME() { return \"exit\"; }\n    virtual ~exit_case_line() {}\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n};\n\nclass state_case_line : public case_line\n{\npublic:\n    static const char *NAME() { return \"state\"; }\n    virtual ~state_case_line() {}\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    // return false if check failed\n    // 'forward' is to indicates if should go forward\n    bool check_state(const state_snapshot &cur_state, bool &forward);\n\nprivate:\n    state_snapshot _state;\n};\n\nclass config_case_line : public case_line\n{\npublic:\n    static const char *NAME() { return \"config\"; }\n    virtual ~config_case_line() {}\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    // return false if check failed\n    // 'forward' is to indicates if should go forward\n    bool check_config(const parti_config &cur_config, bool &forward);\n\nprivate:\n    parti_config _config;\n};\n\nenum event_type\n{\n    task_enqueue,         // node=xxx,code=xxx\n    task_begin,           // node=xxx,code=xxx\n    task_end,             // node=xxx,code=xxx\n    task_cancelled,       // node=xxx,code=xxx\n    aio_call,             // node=xxx\n    aio_enqueue,          // node=xxx\n    rpc_call,             // name=xxx,from=xxx,to=xxx\n    rpc_request_enqueue,  // name=xxx,from=xxx,to=xxx\n    rpc_reply,            // name=xxx,from=xxx,to=xxx\n    rpc_response_enqueue, // name=xxx,from=xxx,to=xxx,err=xxx\n};\n\nconst char *event_type_to_string(event_type type);\nbool event_type_from_string(const std::string &name, event_type &type);\nbool event_type_support_inject_fault(event_type type);\n\nclass event\n{\npublic:\n    virtual ~event() {}\n    virtual event_type type() const = 0;\n    virtual void internal_to_string(std::ostream &oss) const = 0;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map) = 0;\n    // 'this' is the event condition, and 'ev' is the real event occured\n    // return true if 'ev' satisfy 'this' condition\n    virtual bool check_satisfied(const event *ev) const = 0;\n\n    std::string to_string() const;\n    static event *parse(int line_no, const std::string &params);\n};\n\nclass event_on_task : public event\n{\npublic:\n    virtual void internal_to_string(std::ostream &oss) const;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map);\n    virtual bool check_satisfied(const event *ev) const;\n\n    void init(task *tsk);\n\npublic:\n    task *_task;\n    std::string _task_id;\n    std::string _node;\n    std::string _task_code;\n    std::string _delay;\n};\n\nclass event_on_task_enqueue : public event_on_task\n{\npublic:\n    virtual event_type type() const { return task_enqueue; }\n};\n\nclass event_on_task_begin : public event_on_task\n{\npublic:\n    virtual event_type type() const { return task_begin; }\n};\n\nclass event_on_task_end : public event_on_task\n{\npublic:\n    virtual event_type type() const { return task_end; }\n};\n\nclass event_on_task_cancelled : public event_on_task\n{\npublic:\n    virtual event_type type() const { return task_cancelled; }\n};\n\nclass event_on_rpc : public event_on_task\n{\npublic:\n    virtual void internal_to_string(std::ostream &oss) const;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map);\n    virtual bool check_satisfied(const event *ev) const;\n\n    // 'tsk' is the following task by the event, by which we can\n    // connect related events to event sequence.\n    void init(message_ex *msg, task *tsk);\n\npublic:\n    std::string _trace_id;\n    std::string _rpc_name;\n    std::string _from;\n    std::string _to;\n};\n\nclass event_on_rpc_call : public event_on_rpc\n{\npublic:\n    virtual event_type type() const { return rpc_call; }\n};\n\nclass event_on_rpc_request_enqueue : public event_on_rpc\n{\npublic:\n    virtual event_type type() const { return rpc_request_enqueue; }\n\n    void init(rpc_request_task *tsk);\n};\n\nclass event_on_rpc_reply : public event_on_rpc\n{\npublic:\n    virtual event_type type() const { return rpc_reply; }\n};\n\nclass event_on_rpc_response_enqueue : public event_on_rpc\n{\npublic:\n    virtual event_type type() const { return rpc_response_enqueue; }\n    virtual void internal_to_string(std::ostream &oss) const;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map);\n    virtual bool check_satisfied(const event *ev) const;\n\n    void init(rpc_response_task *tsk);\n\npublic:\n    std::string _err;\n};\n\nclass event_on_aio : public event_on_task\n{\npublic:\n    virtual void internal_to_string(std::ostream &oss) const;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map);\n    virtual bool check_satisfied(const event *ev) const;\n\n    void init(aio_task *tsk);\n\npublic:\n    std::string _type;\n    std::string _file_offset;\n    std::string _buffer_size;\n};\n\nclass event_on_aio_call : public event_on_aio\n{\npublic:\n    virtual event_type type() const { return aio_call; }\n};\n\nclass event_on_aio_enqueue : public event_on_aio\n{\npublic:\n    virtual event_type type() const { return aio_enqueue; }\n    virtual void internal_to_string(std::ostream &oss) const;\n    virtual bool internal_parse(const std::map<std::string, std::string> &kv_map);\n    virtual bool check_satisfied(const event *ev) const;\n\n    void init(aio_task *tsk);\n\npublic:\n    std::string _err;\n    std::string _transferred_size;\n};\n\nclass event_case_line : public case_line\n{\npublic:\npublic:\n    virtual ~event_case_line()\n    {\n        if (_event_cond)\n            delete _event_cond;\n    }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    bool check_satisfied(const event *ev) const;\n\npublic:\n    event *_event_cond;\n};\n\nclass wait_case_line : public event_case_line\n{\npublic:\n    static const char *NAME() { return \"wait\"; }\n    virtual std::string name() const { return NAME(); }\n};\n\nclass inject_case_line : public event_case_line\n{\npublic:\n    static const char *NAME() { return \"inject\"; }\n    virtual std::string name() const { return NAME(); }\n    virtual bool parse(const std::string &params);\n};\n\nclass modify_case_line : public event_case_line\n{\npublic:\n    static const char *NAME() { return \"modify\"; }\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n    virtual void modify(const event *ev);\n\npublic:\n    std::string _modify_delay;\n};\n\nclass client_case_line : public case_line\n{\npublic:\n    enum client_type\n    {\n        begin_write,    // id=xxx,key=xxx,value=xxx,timeout=xxx\n        begin_read,     // id=xxx,key=xxx,timeout=xxx\n        end_write,      // id=xxx,err=xxx,resp=xxx\n        end_read,       // id=xxx,err=xxx,resp=xxx\n        replica_config, // receiver=xxx,type=xxx,node=xxx\n    };\n\npublic:\n    static const char *NAME() { return \"client\"; }\n    virtual std::string name() const { return NAME(); }\n    virtual std::string to_string() const;\n    virtual bool parse(const std::string &params);\n\n    client_type type() const { return _type; }\n    std::string type_name() const;\n    bool parse_type_name(const std::string &name);\n    void get_write_params(int &id, std::string &key, std::string &value, int &timeout_ms) const;\n    void get_read_params(int &id, std::string &key, int &timeout_ms) const;\n    void get_replica_config_params(rpc_address &receiver,\n                                   dsn::replication::config_type::type &type,\n                                   rpc_address &node) const;\n    bool check_write_result(int id, ::dsn::error_code err, int32_t resp);\n    bool check_read_result(int id, ::dsn::error_code err, const std::string &resp);\n\n    dsn::replication::config_type::type parse_config_command(const std::string &command_str) const;\n    std::string config_command_to_string(dsn::replication::config_type::type cfg_command) const;\n\nprivate:\n    client_type _type;\n    int _id;\n    std::string _key;\n    std::string _value;\n    int _timeout;\n    dsn::error_code _err;\n    int _write_resp;\n    std::string _read_resp;\n\n    rpc_address _config_receiver;\n    dsn::replication::config_type::type _config_type;\n    rpc_address _config_node;\n};\n\nclass test_case : public dsn::utils::singleton<test_case>\n{\npublic:\n    static bool s_inited;\n\n    // options, can be modified be set_case_line\n    static int s_null_loop;\n    static bool s_close_replica_stub_on_exit;\n\npublic:\n    test_case();\n    ~test_case();\n    bool init(const std::string &case_input);\n    void forward();\n    void fail(const std::string &other);\n    void output(const std::string &line);\n    void print(case_line *cl, const std::string &other, bool is_skip = false);\n\n    // return true if should skip\n    bool check_skip(bool consume_one);\n\n    // client\n    void wait_check_client();\n    void notify_check_client();\n    bool check_client_write(int &id, std::string &key, std::string &value, int &timeout_ms);\n    bool check_replica_config(rpc_address &receiver,\n                              dsn::replication::config_type::type &type,\n                              rpc_address &node);\n    bool check_client_read(int &id, std::string &key, int &timeout_ms);\n    void on_end_write(int id, ::dsn::error_code err, int32_t resp);\n    void on_end_read(int id, ::dsn::error_code err, const std::string &resp);\n\n    // checker\n    void on_check();\n    void on_config_change(const parti_config &last, const parti_config &cur);\n    void on_state_change(const state_snapshot &last, const state_snapshot &cur);\n\n    // injecter\n    bool on_event(const event *ev);\n\nprivate:\n    typedef case_line *(*case_line_creator)(int, const std::string &);\n    void internal_register_creator(const std::string &name, case_line_creator creator);\n    template <typename T>\n    void register_creator()\n    {\n        internal_register_creator(T::NAME(), T::template create<T>);\n    }\n\n    bool check_client_instruction(client_case_line::client_type type);\n\nprivate:\n    std::string _name;\n    std::ofstream _output;\n    std::map<std::string, case_line_creator> _creators;\n    std::vector<case_line *> _case_lines;\n    size_t _next;\n    int _null_loop_count;\n    dsn::zsemaphore _client_sema;\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/checker.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <sstream>\n#include <boost/lexical_cast.hpp>\n#include <dsn/utility/factory_store.h>\n#include <dsn/tool_api.h>\n\n#include \"checker.h\"\n#include \"case.h\"\n\n#include \"replica/replica.h\"\n#include \"replica/replica_stub.h\"\n#include \"replica/mutation_log.h\"\n#include \"meta/meta_service.h\"\n#include \"meta/meta_server_failure_detector.h\"\n#include \"meta/server_state.h\"\n#include \"meta/server_load_balancer.h\"\n\n#include \"runtime/service_engine.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nclass checker_partition_guardian : public partition_guardian\n{\npublic:\n    static bool s_disable_balancer;\n\npublic:\n    checker_partition_guardian(meta_service *svc) : partition_guardian(svc) {}\n    pc_status\n    cure(meta_view view, const dsn::gpid &gpid, configuration_proposal_action &action) override\n    {\n        const partition_configuration &pc = *get_config(*view.apps, gpid);\n        action.type = config_type::CT_INVALID;\n        if (s_disable_balancer)\n            return pc_status::healthy;\n\n        pc_status result;\n        if (pc.primary.is_invalid()) {\n            if (pc.secondaries.size() > 0) {\n                action.node = pc.secondaries[0];\n                for (unsigned int i = 1; i < pc.secondaries.size(); ++i)\n                    if (pc.secondaries[i] < action.node)\n                        action.node = pc.secondaries[i];\n                action.type = config_type::CT_UPGRADE_TO_PRIMARY;\n                result = pc_status::ill;\n            }\n\n            else if (pc.last_drops.size() == 0) {\n                std::vector<rpc_address> sort_result;\n                sort_alive_nodes(*view.nodes,\n                                 server_load_balancer::primary_comparator(*view.nodes),\n                                 sort_result);\n                action.node = sort_result[0];\n                action.type = config_type::CT_ASSIGN_PRIMARY;\n                result = pc_status::ill;\n            }\n\n            // DDD\n            else {\n                action.node = *pc.last_drops.rbegin();\n                action.type = config_type::CT_ASSIGN_PRIMARY;\n                derror(\"%d.%d enters DDD state, we are waiting for its last primary node %s to \"\n                       \"come back ...\",\n                       pc.pid.get_app_id(),\n                       pc.pid.get_partition_index(),\n                       action.node.to_string());\n                result = pc_status::dead;\n            }\n            action.target = action.node;\n        }\n\n        else if (static_cast<int>(pc.secondaries.size()) + 1 < pc.max_replica_count) {\n            std::vector<rpc_address> sort_result;\n            sort_alive_nodes(\n                *view.nodes, server_load_balancer::partition_comparator(*view.nodes), sort_result);\n\n            for (auto &node : sort_result) {\n                if (!is_member(pc, node)) {\n                    action.node = node;\n                    break;\n                }\n            }\n            action.target = pc.primary;\n            action.type = config_type::CT_ADD_SECONDARY;\n            result = pc_status::ill;\n        } else {\n            result = pc_status::healthy;\n        }\n        return result;\n    }\n\n    typedef std::function<bool(const rpc_address &addr1, const rpc_address &addr2)> node_comparator;\n    static void sort_alive_nodes(const node_mapper &nodes,\n                                 const node_comparator &cmp,\n                                 std::vector<rpc_address> &sorted_node)\n    {\n        sorted_node.clear();\n        sorted_node.reserve(nodes.size());\n        for (auto &iter : nodes) {\n            if (!iter.first.is_invalid() && iter.second.alive()) {\n                sorted_node.push_back(iter.first);\n            }\n        }\n        std::sort(sorted_node.begin(), sorted_node.end(), cmp);\n    }\n};\n\nbool test_checker::s_inited = false;\nbool checker_partition_guardian::s_disable_balancer = false;\n\ntest_checker::test_checker() {}\n\nvoid test_checker::control_balancer(bool disable_it)\n{\n    checker_partition_guardian::s_disable_balancer = disable_it;\n    if (disable_it && meta_leader()) {\n        server_state *ss = meta_leader()->_service->_state.get();\n        for (auto &kv : ss->_exist_apps) {\n            std::shared_ptr<app_state> &app = kv.second;\n            app->helpers->clear_proposals();\n        }\n    }\n}\n\nbool test_checker::init(const std::string &name, const std::vector<service_app *> apps)\n{\n    if (s_inited)\n        return false;\n\n    _apps = apps;\n    utils::factory_store<replication::partition_guardian>::register_factory(\n        \"checker_partition_guardian\",\n        replication::partition_guardian::create<checker_partition_guardian>,\n        PROVIDER_TYPE_MAIN);\n\n    for (auto &app : _apps) {\n        if (0 == strcmp(app->info().type.c_str(), \"meta\")) {\n            meta_service_app *meta_app = (meta_service_app *)app;\n            meta_app->_service->_state->set_config_change_subscriber_for_test(\n                std::bind(&test_checker::on_config_change, this, std::placeholders::_1));\n            meta_app->_service->_meta_opts.partition_guardian_type = \"checker_partition_guardian\";\n            _meta_servers.push_back(meta_app);\n        } else if (0 == strcmp(app->info().type.c_str(), \"replica\")) {\n            replication_service_app *replica_app = (replication_service_app *)app;\n            replica_app->_stub->set_replica_state_subscriber_for_test(\n                std::bind(&test_checker::on_replica_state_change,\n                          this,\n                          std::placeholders::_1,\n                          std::placeholders::_2,\n                          std::placeholders::_3),\n                false);\n            _replica_servers.push_back(replica_app);\n        }\n    }\n\n    const auto &nodes = dsn::service_engine::instance().get_all_nodes();\n    for (const auto &node : nodes) {\n        int id = node.second->id();\n        std::string name = node.second->full_name();\n        rpc_address paddr = node.second->rpc()->primary_address();\n        int port = paddr.port();\n        _node_to_address[name] = paddr;\n        ddebug(\"=== node_to_address[%s]=%s\", name.c_str(), paddr.to_string());\n        _address_to_node[port] = name;\n        ddebug(\"=== address_to_node[%u]=%s\", port, name.c_str());\n        if (id != port) {\n            _address_to_node[id] = name;\n            ddebug(\"=== address_to_node[%u]=%s\", id, name.c_str());\n        }\n    }\n\n    s_inited = true;\n\n    if (!test_case::instance().init(g_case_input)) {\n        std::cerr << \"init test_case failed\" << std::endl;\n        s_inited = false;\n        return false;\n    }\n\n    return true;\n}\n\nvoid test_checker::exit()\n{\n    if (!s_inited)\n        return;\n\n    for (meta_service_app *app : _meta_servers) {\n        app->_service->_started.store(false);\n    }\n\n    if (test_case::s_close_replica_stub_on_exit) {\n        dsn::tools::tool_app *app = dsn::tools::get_current_tool();\n        app->stop_all_apps(true);\n    }\n}\n\nvoid test_checker::check()\n{\n    test_case::instance().on_check();\n    if (g_done)\n        return;\n\n    // 'config_change' and 'replica_state_change' are detected in two ways:\n    //   - each time this check() is called, checking will be applied\n    //   - register subscribers on meta_server and replica_server to be notified\n\n    parti_config cur_config;\n    if (get_current_config(cur_config) && cur_config != _last_config) {\n        test_case::instance().on_config_change(_last_config, cur_config);\n        _last_config = cur_config;\n    }\n\n    state_snapshot cur_states;\n    get_current_states(cur_states);\n    if (cur_states != _last_states) {\n        test_case::instance().on_state_change(_last_states, cur_states);\n        _last_states = cur_states;\n    }\n}\n\nvoid test_checker::on_replica_state_change(::dsn::rpc_address from,\n                                           const replica_configuration &new_config,\n                                           bool is_closing)\n{\n    state_snapshot cur_states;\n    get_current_states(cur_states);\n    if (cur_states != _last_states) {\n        test_case::instance().on_state_change(_last_states, cur_states);\n        _last_states = cur_states;\n    }\n}\n\nvoid test_checker::on_config_change(const app_mapper &new_config)\n{\n    const partition_configuration *pc = get_config(new_config, g_default_gpid);\n    dassert(pc != nullptr, \"drop table is not allowed in test\");\n\n    parti_config cur_config;\n    cur_config.convert_from(*pc);\n    if (cur_config != _last_config) {\n        test_case::instance().on_config_change(_last_config, cur_config);\n        _last_config = cur_config;\n    }\n}\n\nvoid test_checker::get_current_states(state_snapshot &states)\n{\n    states.state_map.clear();\n    for (auto &app : _replica_servers) {\n        if (!app->is_started())\n            continue;\n\n        for (auto &kv : app->_stub->_replicas) {\n            replica_ptr r = kv.second;\n            dassert(kv.first == r->get_gpid(), \"\");\n            replica_id id(r->get_gpid(), app->info().full_name);\n            replica_state &rs = states.state_map[id];\n            rs.id = id;\n            rs.status = r->status();\n            rs.ballot = r->get_ballot();\n            rs.last_committed_decree = r->last_committed_decree();\n            rs.last_durable_decree = r->last_durable_decree();\n        }\n    }\n}\n\nbool test_checker::get_current_config(parti_config &config)\n{\n    meta_service_app *meta = meta_leader();\n    if (meta == nullptr)\n        return false;\n    partition_configuration c;\n\n    // we should never try to acquire lock when we are in checker. Because we are the only\n    // thread that is running.\n    // The app and simulator have lots in common with the OS's userspace and kernel space.\n    // In normal case, \"apps\" runs in \"userspace\". You can \"trap into kernel(i.e. the simulator)\" by\n    // the rDSN's\n    //\"enqueue,dequeue and lock...\"\n\n    // meta->_service->_state->query_configuration_by_gpid(g_default_gpid, c);\n    const meta_view view = meta->_service->_state->get_meta_view();\n    const partition_configuration *pc = get_config(*(view.apps), g_default_gpid);\n    c = *pc;\n    config.convert_from(c);\n    return true;\n}\n\nmeta_service_app *test_checker::meta_leader()\n{\n    for (auto &meta : _meta_servers) {\n        if (!meta->is_started())\n            return nullptr;\n\n        if (meta->_service->_failure_detector->get_leader(nullptr))\n            return meta;\n    }\n    return nullptr;\n}\n\nbool test_checker::is_server_normal()\n{\n    auto meta = meta_leader();\n    if (!meta)\n        return false;\n    return check_replica_state(1, 2, 0);\n}\n\nbool test_checker::check_replica_state(int primary_count, int secondary_count, int inactive_count)\n{\n    int p = 0;\n    int s = 0;\n    int i = 0;\n    for (auto &rs : _replica_servers) {\n        if (!rs->is_started())\n            return false;\n        for (auto &replica : rs->_stub->_replicas) {\n            auto status = replica.second->status();\n            if (status == partition_status::PS_PRIMARY)\n                p++;\n            else if (status == partition_status::PS_SECONDARY)\n                s++;\n            else if (status == partition_status::PS_INACTIVE)\n                i++;\n        }\n    }\n    return p == primary_count && s == secondary_count && i == inactive_count;\n}\n\nstd::string test_checker::address_to_node_name(rpc_address addr)\n{\n    auto find = _address_to_node.find(addr.port());\n    if (find != _address_to_node.end())\n        return find->second;\n    return \"node@\" + boost::lexical_cast<std::string>(addr.port());\n}\n\nrpc_address test_checker::node_name_to_address(const std::string &name)\n{\n    auto find = _node_to_address.find(name);\n    if (find != _node_to_address.end())\n        return find->second;\n    return rpc_address();\n}\n\nvoid install_checkers()\n{\n    dsn::tools::simulator::register_checker(\"simple_kv.checker\",\n                                            dsn::tools::checker::create<wrap_checker>);\n}\n} // namespace test\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/checker.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include \"common.h\"\n\n#include <dsn/utility/singleton.h>\n#include <dsn/tool/simulator.h>\n#include <dsn/dist/replication/meta_service_app.h>\n#include <dsn/dist/replication/replication_service_app.h>\n\n#include \"meta/server_state.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nusing ::dsn::service::meta_service_app;\n\nclass test_checker : public dsn::utils::singleton<test_checker>\n{\npublic:\n    static bool s_inited;\n\npublic:\n    test_checker();\n\n    bool init(const std::string &name, const std::vector<service_app *> apps);\n\n    void exit();\n\n    void check();\n\n    meta_service_app *meta_leader();\n\n    void control_balancer(bool disable_it);\n\n    bool is_server_normal();\n\n    bool check_replica_state(int primary_count, int secondary_count, int inactive_count);\n\n    std::string address_to_node_name(rpc_address addr);\n    rpc_address node_name_to_address(const std::string &name);\n\n    void on_replica_state_change(::dsn::rpc_address from,\n                                 const replica_configuration &new_config,\n                                 bool is_closing);\n    void on_config_change(const app_mapper &new_config);\n\n    void get_current_states(state_snapshot &states);\n    bool get_current_config(parti_config &config);\n\nprivate:\n    std::vector<service_app *> _apps;\n    std::vector<meta_service_app *> _meta_servers;\n    std::vector<replication_service_app *> _replica_servers;\n\n    parti_config _last_config;\n    state_snapshot _last_states;\n\n    std::map<std::string, dsn::rpc_address> _node_to_address; // address is primary_address()\n    std::map<int, std::string> _address_to_node;              // port is enough for key\n};\n\nclass wrap_checker : public dsn::tools::checker\n{\npublic:\n    wrap_checker() : dsn::tools::checker() {}\n\n    virtual void initialize(const std::string &name, const std::vector<service_app *> &apps)\n    {\n        _checker = &test_checker::instance();\n        if (!_checker->init(name, apps)) {\n            g_done = true;\n            g_fail = true;\n        }\n    }\n    virtual void check() override { _checker->check(); }\n\nprivate:\n    test_checker *_checker;\n};\n\nvoid install_checkers();\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf data core*\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/client.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"client.h\"\n#include \"case.h\"\n#include <dsn/tool-api/group_address.h>\n#include <dsn/dist/replication/replication_other_types.h>\n\n#include <sstream>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nusing namespace dsn::replication::application;\nDEFINE_TASK_CODE(LPC_SIMPLE_KV_TEST, TASK_PRIORITY_COMMON, dsn::THREAD_POOL_DEFAULT)\n\nsimple_kv_client_app::simple_kv_client_app(const service_app_info *info)\n    : ::dsn::service_app(info), _simple_kv_client(nullptr)\n{\n}\n\nsimple_kv_client_app::~simple_kv_client_app() { stop(); }\n\n::dsn::error_code simple_kv_client_app::start(const std::vector<std::string> &args)\n{\n    if (args.size() < 2)\n        return ::dsn::ERR_INVALID_PARAMETERS;\n\n    std::vector<rpc_address> meta_servers;\n    replica_helper::load_meta_servers(meta_servers);\n    _meta_server_group.assign_group(\"meta_servers\");\n    _meta_server_group.group_address()->add_list(meta_servers);\n\n    _simple_kv_client.reset(\n        new application::simple_kv_client(\"mycluster\", meta_servers, \"simple_kv.instance0\"));\n\n    dsn::tasking::enqueue(\n        LPC_SIMPLE_KV_TEST, &_tracker, std::bind(&simple_kv_client_app::run, this));\n\n    return ::dsn::ERR_OK;\n}\n\ndsn::error_code simple_kv_client_app::stop(bool cleanup)\n{\n    _tracker.cancel_outstanding_tasks();\n    _simple_kv_client.reset();\n    return ::dsn::ERR_OK;\n}\n\nvoid simple_kv_client_app::run()\n{\n    int id;\n    std::string key;\n    std::string value;\n    int timeout_ms;\n\n    rpc_address receiver;\n    dsn::replication::config_type::type type;\n    rpc_address node;\n\n    while (!g_done) {\n        if (test_case::instance().check_client_write(id, key, value, timeout_ms)) {\n            begin_write(id, key, value, timeout_ms);\n            continue;\n        }\n        if (test_case::instance().check_replica_config(receiver, type, node)) {\n            send_config_to_meta(receiver, type, node);\n            continue;\n        }\n        if (test_case::instance().check_client_read(id, key, timeout_ms)) {\n            begin_read(id, key, timeout_ms);\n            continue;\n        }\n        test_case::instance().wait_check_client();\n    }\n}\n\nstruct write_context\n{\n    int id;\n    ::dsn::replication::test::kv_pair req;\n    int timeout_ms;\n};\n\nvoid simple_kv_client_app::begin_write(int id,\n                                       const std::string &key,\n                                       const std::string &value,\n                                       int timeout_ms)\n{\n    ddebug(\"=== on_begin_write:id=%d,key=%s,value=%s,timeout=%d\",\n           id,\n           key.c_str(),\n           value.c_str(),\n           timeout_ms);\n    std::shared_ptr<write_context> ctx(new write_context());\n    ctx->id = id;\n    ctx->req.key = key;\n    ctx->req.value = value;\n    ctx->timeout_ms = timeout_ms;\n    auto &req = ctx->req;\n    _simple_kv_client->write(req,\n                             [ctx](error_code err, int32_t resp) {\n                                 test_case::instance().on_end_write(ctx->id, err, resp);\n                             },\n                             std::chrono::milliseconds(timeout_ms));\n}\n\nvoid simple_kv_client_app::send_config_to_meta(const rpc_address &receiver,\n                                               dsn::replication::config_type::type type,\n                                               const rpc_address &node)\n{\n    dsn::message_ex *req = dsn::message_ex::create_request(RPC_CM_PROPOSE_BALANCER, 30000);\n\n    configuration_balancer_request request;\n    request.gpid = g_default_gpid;\n\n    configuration_proposal_action act;\n    act.__set_target(receiver);\n    act.__set_node(node);\n    act.__set_type(type);\n    request.action_list.emplace_back(std::move(act));\n    request.__set_force(true);\n\n    dsn::marshall(req, request);\n\n    dsn_rpc_call_one_way(_meta_server_group, req);\n}\n\nstruct read_context\n{\n    int id;\n    std::string key;\n    int timeout_ms;\n};\n\nvoid simple_kv_client_app::begin_read(int id, const std::string &key, int timeout_ms)\n{\n    ddebug(\"=== on_begin_read:id=%d,key=%s,timeout=%d\", id, key.c_str(), timeout_ms);\n    std::shared_ptr<read_context> ctx(new read_context());\n    ctx->id = id;\n    ctx->key = key;\n    ctx->timeout_ms = timeout_ms;\n    _simple_kv_client->read(key,\n                            [ctx](error_code err, std::string &&resp) {\n                                test_case::instance().on_end_read(ctx->id, err, resp);\n                            },\n                            std::chrono::milliseconds(timeout_ms));\n}\n} // namespace test\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/client.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/cpp/service_app.h>\n#include <dsn/dist/replication/replication_types.h>\n#include \"replica/storage/simple_kv/simple_kv.client.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nclass simple_kv_client_app : public ::dsn::service_app\n{\npublic:\n    simple_kv_client_app(const service_app_info *info);\n    virtual ~simple_kv_client_app();\n\n    virtual ::dsn::error_code start(const std::vector<std::string> &args) override;\n    virtual ::dsn::error_code stop(bool cleanup = false) override;\n\n    void run();\n\n    void begin_read(int id, const std::string &key, int timeout_ms);\n    void begin_write(int id, const std::string &key, const std::string &value, int timeout_ms);\n    void send_config_to_meta(const rpc_address &receiver,\n                             dsn::replication::config_type::type type,\n                             const rpc_address &node);\n\nprivate:\n    std::unique_ptr<application::simple_kv_client> _simple_kv_client;\n    rpc_address _meta_server_group;\n    rpc_address _service_addr;\n    dsn::task_tracker _tracker;\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/common.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"common.h\"\n#include \"checker.h\"\n\n#include <dsn/utility/utils.h>\n\n#include <sstream>\n#include <boost/lexical_cast.hpp>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nstd::string g_case_input(\"case-000.act\");\ngpid g_default_gpid(1, 0);\nbool g_done = false;\nbool g_fail = false;\n\nconst char *partition_status_to_short_string(partition_status::type s)\n{\n    switch (s) {\n    case partition_status::PS_INACTIVE:\n        return \"ina\";\n    case partition_status::PS_ERROR:\n        return \"err\";\n    case partition_status::PS_PRIMARY:\n        return \"pri\";\n    case partition_status::PS_SECONDARY:\n        return \"sec\";\n    case partition_status::PS_POTENTIAL_SECONDARY:\n        return \"pot\";\n    case partition_status::PS_INVALID:\n        return \"inv\";\n    default:\n        dassert(false, \"invalid partition_status, status = %s\", ::dsn::enum_to_string(s));\n        return \"\";\n    }\n}\n\npartition_status::type partition_status_from_short_string(const std::string &str)\n{\n    if (str == \"ina\")\n        return partition_status::PS_INACTIVE;\n    if (str == \"err\")\n        return partition_status::PS_ERROR;\n    if (str == \"pri\")\n        return partition_status::PS_PRIMARY;\n    if (str == \"sec\")\n        return partition_status::PS_SECONDARY;\n    if (str == \"pot\")\n        return partition_status::PS_POTENTIAL_SECONDARY;\n    if (str == \"inv\")\n        return partition_status::PS_INVALID;\n    dassert(false, \"\");\n    return partition_status::PS_INVALID;\n}\n\nstd::string address_to_node(rpc_address addr)\n{\n    if (addr.is_invalid())\n        return \"-\";\n    dassert(test_checker::s_inited, \"\");\n    return test_checker::instance().address_to_node_name(addr);\n}\n\nrpc_address node_to_address(const std::string &name)\n{\n    if (name == \"-\")\n        return rpc_address();\n    dassert(test_checker::s_inited, \"\");\n    return test_checker::instance().node_name_to_address(name);\n}\n\nstd::string gpid_to_string(gpid gpid)\n{\n    std::stringstream oss;\n    oss << gpid.get_app_id() << \".\" << gpid.get_partition_index();\n    return oss.str();\n}\n\nbool gpid_from_string(const std::string &str, gpid &gpid)\n{\n    size_t pos = str.find('.');\n    if (pos == std::string::npos)\n        return false;\n    gpid.set_app_id(boost::lexical_cast<int32_t>(str.substr(0, pos)));\n    gpid.set_partition_index(boost::lexical_cast<int32_t>(str.substr(pos + 1)));\n    return true;\n}\n\nstd::string replica_id::to_string() const\n{\n    std::stringstream oss;\n#ifdef ENABLE_GPID\n    oss << gpid_to_string(gpid) << \"@\" << node;\n#else\n    oss << node;\n#endif\n    return oss.str();\n}\n\nbool replica_id::from_string(const std::string &str)\n{\n    if (str.empty())\n        return false;\n#ifdef ENABLE_GPID\n    size_t pos = str.find('@');\n    if (pos == std::string::npos)\n        return false;\n    if (!gpid_from_string(str.substr(0, pos), gpid))\n        return false;\n    node = str.substr(pos + 1);\n    if (node.empty())\n        return false;\n#else\n    node = str;\n#endif\n    return true;\n}\n\nstd::string replica_state::to_string() const\n{\n    std::stringstream oss;\n    oss << \"{\" << id.to_string() << \",\" << partition_status_to_short_string(status) << \",\" << ballot\n        << \",\" << last_committed_decree;\n    if (last_durable_decree != -1)\n        oss << \",\" << last_durable_decree;\n    oss << \"}\";\n    return oss.str();\n}\n\n//{r3,sec,3,0} or {r3,sec,3,1,0}\nbool replica_state::from_string(const std::string &str)\n{\n    if (str.size() < 2 || str[0] != '{' || str[str.size() - 1] != '}')\n        return false;\n    std::string s = str.substr(1, str.size() - 2);\n    std::vector<std::string> splits;\n    dsn::utils::split_args(s.c_str(), splits, ',');\n    if (splits.size() != 4 && splits.size() != 5)\n        return false;\n    if (!id.from_string(splits[0]))\n        return false;\n    status = partition_status_from_short_string(splits[1]);\n    ballot = boost::lexical_cast<int64_t>(splits[2]);\n    last_committed_decree = boost::lexical_cast<decree>(splits[3]);\n    if (splits.size() == 5)\n        last_durable_decree = boost::lexical_cast<decree>(splits[4]);\n    return true;\n}\n\nstd::string state_snapshot::to_string() const\n{\n    std::stringstream oss;\n    oss << \"{\";\n    int i = 0;\n    for (auto &kv : state_map) {\n        const replica_state &s = kv.second;\n        if (i != 0)\n            oss << \",\";\n        oss << s.to_string();\n        i++;\n    }\n    oss << \"}\";\n    return oss.str();\n}\n\n//{{r1,pri,3,0},{r2,sec,3,0},{r3,sec,3,0}}\nbool state_snapshot::from_string(const std::string &str)\n{\n    if (str.size() < 2 || str[0] != '{' || str[str.size() - 1] != '}')\n        return false;\n    state_map.clear();\n    std::string s = str.substr(1, str.size() - 2);\n    std::vector<std::string> splits;\n    dsn::utils::split_args(s.c_str(), splits, '{');\n    for (std::string &i : splits) {\n        if (i.empty())\n            continue;\n        if (i[i.size() - 1] == ',')\n            i.resize(i.size() - 1);\n        std::string x = \"{\" + i;\n        replica_state v;\n        if (!v.from_string(x))\n            return false;\n        if (state_map.find(v.id) != state_map.end())\n            return false;\n        state_map[v.id] = v;\n    }\n    return true;\n}\n\nstd::string state_snapshot::diff_string(const state_snapshot &other) const\n{\n    auto &oth = other.state_map;\n    auto &cur = this->state_map;\n\n    const char *add_mark = \"  + \";\n    const char *del_mark = \"  - \";\n    const char *chg_mark = \"  x \";\n    const char *unc_mark = \"    \";\n\n    auto oth_it = oth.begin();\n    auto cur_it = cur.begin();\n    std::stringstream oss;\n    oss << \"{\" << std::endl;\n    while (oth_it != oth.end() && cur_it != cur.end()) {\n        if (oth_it->first < cur_it->first) {\n            oss << del_mark << oth_it->second.to_string() << std::endl;\n            ++oth_it;\n        } else if (cur_it->first < oth_it->first) {\n            oss << add_mark << cur_it->second.to_string() << std::endl;\n            ++cur_it;\n        } else {\n            dassert(oth_it->first == cur_it->first,\n                    \"invalid replica_id, %s VS %s\",\n                    oth_it->first.to_string().c_str(),\n                    cur_it->first.to_string().c_str());\n            if (oth_it->second != cur_it->second) {\n                oss << chg_mark << cur_it->second.to_string()\n                    << \" <= \" << oth_it->second.to_string() << std::endl;\n            } else {\n                oss << unc_mark << cur_it->second.to_string() << std::endl;\n            }\n            ++oth_it;\n            ++cur_it;\n        }\n    }\n    while (oth_it != oth.end()) {\n        oss << del_mark << oth_it->second.to_string() << std::endl;\n        ++oth_it;\n    }\n    while (cur_it != cur.end()) {\n        oss << add_mark << cur_it->second.to_string() << std::endl;\n        ++cur_it;\n    }\n    oss << \"}\";\n\n    return oss.str();\n}\n\nstd::string parti_config::to_string() const\n{\n    std::stringstream oss;\n    oss << \"{\"\n#ifdef ENABLE_GPID\n        << gpid_to_string(gpid) << \",\"\n#endif\n        << ballot << \",\" << primary << \",[\";\n    for (size_t i = 0; i < secondaries.size(); ++i) {\n        if (i != 0)\n            oss << \",\";\n        oss << secondaries[i];\n    }\n    oss << \"]}\";\n    return oss.str();\n}\n\n//{3,r1,[r2,r3],0}\nbool parti_config::from_string(const std::string &str)\n{\n    if (str.size() < 2 || str[0] != '{' || str[str.size() - 1] != '}')\n        return false;\n    std::string s = str.substr(1, str.size() - 2);\n    // replace ',' in [] to ';'\n    size_t pos1 = s.find('[');\n    size_t pos2 = s.find(']');\n    if (pos1 == std::string::npos || pos2 == std::string::npos || pos1 > pos2)\n        return false;\n    for (size_t i = pos1 + 1; i < pos2; ++i) {\n        if (s[i] == ',')\n            s[i] = ';';\n    }\n    std::vector<std::string> splits;\n    dsn::utils::split_args(s.c_str(), splits, ',');\n    size_t i = 0;\n#ifdef ENABLE_GPID\n    // gpid\n    if (!gpid_from_string(splits[i++], gpid))\n        return false;\n#endif\n    // ballot\n    ballot = boost::lexical_cast<int64_t>(splits[i++]);\n    // primary\n    primary = splits[i++];\n    // secondaries\n    std::string sec = splits[i++];\n    if (sec.size() < 2 || sec[0] != '[' || sec[sec.size() - 1] != ']')\n        return false;\n    dsn::utils::split_args(sec.substr(1, sec.size() - 2).c_str(), secondaries, ';');\n    std::sort(secondaries.begin(), secondaries.end());\n    if (i != splits.size())\n        return false;\n    return true;\n}\n\nvoid parti_config::convert_from(const partition_configuration &c)\n{\n    pid = c.pid;\n    ballot = c.ballot;\n    primary = address_to_node(c.primary);\n    for (auto &s : c.secondaries)\n        secondaries.push_back(address_to_node(s));\n    std::sort(secondaries.begin(), secondaries.end());\n}\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/common.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/dist/replication.h>\n#include \"common/replication_common.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nextern std::string g_case_input;\nextern gpid g_default_gpid;\nextern bool g_done;\nextern bool g_fail;\n\nconst char *partition_status_to_short_string(partition_status::type s);\npartition_status::type partition_status_from_short_string(const std::string &str);\n\n// transfer primary_address to node_name\n// return \"-\" if addr.is_invalid()\n// return \"node@port\" if not found\nstd::string address_to_node(rpc_address addr);\n// transfer node_name to primary_address\n// return invalid addr if not found\nrpc_address node_to_address(const std::string &name);\n\nstd::string gpid_to_string(gpid gpid);\nbool gpid_from_string(const std::string &str, gpid &gpid);\n\nstruct replica_id\n{\n    gpid pid;\n    std::string node;\n    replica_id() : pid(g_default_gpid) {}\n    replica_id(gpid g, const std::string &n) : pid(g), node(n) {}\n    replica_id &operator=(const replica_id &o)\n    {\n        if (this == &o)\n            return *this;\n        pid = o.pid;\n        node = o.node;\n        return *this;\n    }\n    bool operator<(const replica_id &o) const\n    {\n        return (pid < o.pid) || (pid == o.pid && node < o.node);\n    }\n    bool operator==(const replica_id &o) const { return pid == o.pid && node == o.node; }\n    bool operator!=(const replica_id &o) const { return !(*this == o); }\n    std::string to_string() const;\n    bool from_string(const std::string &str);\n};\n\nstruct replica_state\n{\n    replica_id id;\n    partition_status::type status;\n    int64_t ballot;\n    decree last_committed_decree;\n    decree last_durable_decree; // -1 means not set\n    replica_state()\n        : status(partition_status::PS_INACTIVE),\n          ballot(0),\n          last_committed_decree(0),\n          last_durable_decree(-1)\n    {\n    }\n    replica_state &operator=(const replica_state &o)\n    {\n        if (this == &o)\n            return *this;\n        id = o.id;\n        status = o.status;\n        ballot = o.ballot;\n        last_committed_decree = o.last_committed_decree;\n        last_durable_decree = o.last_durable_decree;\n        return *this;\n    }\n    bool operator==(const replica_state &o) const\n    {\n        return id == o.id && status == o.status && ballot == o.ballot &&\n               last_committed_decree == o.last_committed_decree &&\n               (last_durable_decree == -1 || o.last_durable_decree == -1 ||\n                last_durable_decree == o.last_durable_decree);\n    }\n    bool operator!=(const replica_state &o) const { return !(*this == o); }\n    std::string to_string() const;\n    bool from_string(const std::string &str);\n};\n\nstruct state_snapshot\n{\n    std::map<replica_id, replica_state> state_map;\n    state_snapshot &operator=(const state_snapshot &o)\n    {\n        if (this == &o)\n            return *this;\n        state_map = o.state_map;\n        return *this;\n    }\n    bool operator==(const state_snapshot &o) const { return state_map == o.state_map; }\n    bool operator!=(const state_snapshot &o) const { return !(*this == o); }\n    bool operator<(const state_snapshot &o) const\n    {\n        for (auto &kv : state_map) {\n            auto find = o.state_map.find(kv.first);\n            if (find == o.state_map.end())\n                continue;\n            const replica_state &oth_state = find->second;\n            const replica_state &cur_state = kv.second;\n            if (cur_state.ballot > oth_state.ballot ||\n                cur_state.last_committed_decree > oth_state.last_committed_decree)\n                return false;\n            if (cur_state.last_durable_decree != -1 && oth_state.last_durable_decree != -1 &&\n                cur_state.last_durable_decree > oth_state.last_durable_decree)\n                return false;\n        }\n        return true;\n    }\n    std::string to_string() const;\n    bool from_string(const std::string &str);\n    std::string diff_string(const state_snapshot &other) const;\n};\n\nstruct parti_config\n{\n    gpid pid;\n    int64_t ballot;\n    std::string primary;\n    std::vector<std::string> secondaries;\n    parti_config() : pid(g_default_gpid), ballot(0) {}\n    parti_config &operator=(const parti_config &o)\n    {\n        if (this == &o)\n            return *this;\n        pid = o.pid;\n        ballot = o.ballot;\n        primary = o.primary;\n        secondaries = o.secondaries;\n        return *this;\n    }\n    bool operator==(const parti_config &o) const\n    {\n        return pid == o.pid && ballot == o.ballot && primary == o.primary &&\n               secondaries == o.secondaries;\n    }\n    bool operator!=(const parti_config &o) const { return !(*this == o); }\n    bool operator<(const parti_config &o) const { return pid == o.pid && ballot < o.ballot; }\n    std::string to_string() const;\n    bool from_string(const std::string &str);\n    void convert_from(const partition_configuration &c);\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/config.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n\n[apps.meta]\ntype = meta\narguments = \nports = 34601\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT,THREAD_POOL_META_SERVER,THREAD_POOL_FD\n\n[apps.simple_kv]\ntype = simple_kv\narguments = \nports = 34801\nrun = true\ncount = 0\npools = THREAD_POOL_DEFAULT\n\n[apps.replica]\ntype = replica\narguments = \nports = 34801\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_FD,THREAD_POOL_LOCAL_APP,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\nhosted_app_type_name = simple_kv\nhosted_app_arguments = \n\n[apps.client]\n\ntype = client\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n[apps.client.perf.test]\ntype = client.perf.test\narguments = dsn://mycluster/simple_kv.instance0\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT\n\n\n[core]\n\n\ntool = simulator\n;tool = nativerun\n;toollets = tracer\n;toollets = fault_injector\n;toollets = tracer, fault_injector\ntoollets = tracer, profiler, fault_injector\n;toollets = profiler, fault_injector\npause_on_start = false\n\n;logging_start_level = LOG_LEVEL_WARNING\n;logging_factory_name = dsn::tools::screen_logger\n;logging_factory_name = dsn::tools::hpc_logger\n\n[tools.simulator]\nrandom_seed = 0\n;min_message_delay_microseconds = 0\n;max_message_delay_microseconds = 0\n\n[network]\n; how many network threads for network library(used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n\n[threadpool..default]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replication\npartitioned = true\nworker_priority = THREAD_xPRIORITY_LOWEST\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\ndisk_write_fail_ratio = 0.0\ndisk_read_fail_ratio = 0.00001\n\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nallow_inline = false\ndisk_write_fail_ratio = 0.0\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\n\n[task.LPC_CHECKPOINT_REPLICA]\n;execution_extra_delay_us_max = 10000000\n\n[task.LPC_LEARN_REMOTE_DELTA_FILES]\n;execution_extra_delay_us_max = 10000000\n\n[task.RPC_FD_FAILURE_DETECTOR_PING]\nis_trace = false\nrpc_call_channel = RPC_CHANNEL_UDP\n\n[task.RPC_FD_FAILURE_DETECTOR_PING_ACK]\nis_trace = false\nrpc_call_channel = RPC_CHANNEL_UDP\n\n[task.LPC_BEACON_CHECK]\nis_trace = false\n\n[task.RPC_PREPARE]\nrpc_request_resend_timeout_milliseconds = 8000\n\n[task.LPC_DAEMON_APPS_CHECK_TIMER]\nis_trace = false\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_WRITE]\nrpc_timeout_milliseconds = 5000\n\n[task.RPC_SIMPLE_KV_SIMPLE_KV_APPEND]\nrpc_timeout_milliseconds = 5000\n\n[meta_server]\nmin_live_node_count_for_unfreeze = 1\nserver_list = localhost:34601\n\n[uri-resolver.dsn://mycluster]\nfactory = partition_resolver_simple\narguments = localhost:34601\n\n[replication.app]\napp_name = simple_kv.instance0\napp_type = simple_kv\npartition_count = 1\nmax_replica_count = 3\nstateful = true\n\n[replication]\n\nprepare_timeout_ms_for_secondaries = 10000\nprepare_timeout_ms_for_potential_secondaries = 20000\n\nlearn_timeout_ms = 30000\nstaleness_for_commit = 20\nstaleness_for_start_prepare_for_potential_secondary = 110\nmutation_max_size_mb = 15\nmutation_max_pending_time_ms = 20\nmutation_2pc_min_replica_count = 2\n\nprepare_list_max_size_mb = 250\nrequest_batch_disabled = false\ngroup_check_internal_ms = 100000\ngroup_check_disabled = false\nfd_disabled = false\nfd_check_interval_seconds = 5\nfd_beacon_interval_seconds = 3\nfd_lease_seconds = 14\nfd_grace_seconds = 15\nworking_dir = .\nlog_buffer_size_mb = 1\nlog_pending_max_ms = 100\nlog_file_size_mb = 32\nlog_batch_write = true\n\nlog_enable_shared_prepare = true\nlog_enable_private_commit = false\n\nconfig_sync_interval_ms = 60000\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/injector.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"injector.h\"\n#include \"checker.h\"\n#include \"case.h\"\n\n#include <dsn/toollet/fault_injector.h>\n#include <dsn/service_api_c.h>\n#include <dsn/utility/autoref_ptr.h>\n\n#include <iostream>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nstatic void inject_on_task_enqueue(task *caller, task *callee)\n{\n    if (!test_checker::s_inited)\n        return;\n\n    event_on_task_enqueue event;\n    event.init(callee);\n\n    test_case::instance().on_event(&event);\n}\n\nstatic void inject_on_task_begin(task *this_)\n{\n    if (!test_checker::s_inited)\n        return;\n\n    event_on_task_begin event;\n    event.init(this_);\n\n    test_case::instance().on_event(&event);\n}\n\nstatic void inject_on_task_end(task *this_)\n{\n    if (!test_checker::s_inited)\n        return;\n\n    event_on_task_end event;\n    event.init(this_);\n\n    test_case::instance().on_event(&event);\n}\n\nstatic void inject_on_task_cancelled(task *this_)\n{\n    if (!test_checker::s_inited)\n        return;\n\n    event_on_task_cancelled event;\n    event.init(this_);\n\n    test_case::instance().on_event(&event);\n}\n\nstatic void inject_on_task_wait_pre(task *caller, task *callee, uint32_t timeout_ms)\n{\n    if (!test_checker::s_inited)\n        return;\n}\n\nstatic void inject_on_task_wait_post(task *caller, task *callee, bool succ)\n{\n    if (!test_checker::s_inited)\n        return;\n}\n\nstatic void inject_on_task_cancel_post(task *caller, task *callee, bool succ)\n{\n    if (!test_checker::s_inited)\n        return;\n}\n\nstatic bool inject_on_aio_call(task *caller, aio_task *callee)\n{\n    if (!test_checker::s_inited)\n        return true;\n\n    event_on_aio_call event;\n    event.init(callee);\n\n    return test_case::instance().on_event(&event);\n}\n\nstatic void inject_on_aio_enqueue(aio_task *this_)\n{\n    if (!test_checker::s_inited)\n        return;\n\n    event_on_aio_enqueue event;\n    event.init(this_);\n\n    test_case::instance().on_event(&event);\n}\n\nstatic bool inject_on_rpc_call(task *caller, message_ex *req, rpc_response_task *callee)\n{\n    if (!test_checker::s_inited)\n        return true;\n\n    event_on_rpc_call event;\n    event.init(req, nullptr);\n\n    return test_case::instance().on_event(&event);\n}\n\nstatic bool inject_on_rpc_request_enqueue(rpc_request_task *callee)\n{\n    if (!test_checker::s_inited)\n        return true;\n\n    event_on_rpc_request_enqueue event;\n    event.init(callee);\n\n    return test_case::instance().on_event(&event);\n}\n\nstatic bool inject_on_rpc_reply(task *caller, message_ex *msg)\n{\n    if (!test_checker::s_inited)\n        return true;\n\n    event_on_rpc_reply event;\n    event.init(msg, nullptr);\n\n    return test_case::instance().on_event(&event);\n}\n\nstatic bool inject_on_rpc_response_enqueue(rpc_response_task *resp)\n{\n    if (!test_checker::s_inited)\n        return true;\n\n    event_on_rpc_response_enqueue event;\n    event.init(resp);\n\n    return test_case::instance().on_event(&event);\n}\n\nvoid test_injector::install(service_spec &svc_spec)\n{\n    for (int i = 0; i <= dsn::task_code::max(); i++) {\n        if (i == TASK_CODE_INVALID)\n            continue;\n\n        task_spec *spec = task_spec::get(i);\n\n        spec->on_task_enqueue.put_back(inject_on_task_enqueue, \"test_injector\");\n        spec->on_task_begin.put_back(inject_on_task_begin, \"test_injector\");\n        spec->on_task_end.put_back(inject_on_task_end, \"test_injector\");\n        spec->on_task_cancelled.put_back(inject_on_task_cancelled, \"test_injector\");\n        spec->on_task_wait_pre.put_back(inject_on_task_wait_pre, \"test_injector\");\n        spec->on_task_wait_post.put_back(inject_on_task_wait_post, \"test_injector\");\n        spec->on_task_cancel_post.put_back(inject_on_task_cancel_post, \"test_injector\");\n        spec->on_aio_call.put_native(inject_on_aio_call);\n        spec->on_aio_enqueue.put_back(inject_on_aio_enqueue, \"test_injector\");\n        spec->on_rpc_call.put_native(inject_on_rpc_call);\n        spec->on_rpc_request_enqueue.put_native(inject_on_rpc_request_enqueue);\n        spec->on_rpc_reply.put_native(inject_on_rpc_reply);\n        spec->on_rpc_response_enqueue.put_native(inject_on_rpc_response_enqueue);\n    }\n\n    // ddebug(\"=== test_injector installed\");\n}\n\ntest_injector::test_injector(const char *name) : toollet(name) {}\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/injector.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nclass test_injector : public dsn::tools::toollet\n{\npublic:\n    test_injector(const char *name);\n    virtual void install(service_spec &spec);\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nbin=./dsn.rep_tests.simple_kv\n\nfunction run_single()\n{\n    prefix=$1\n    echo \"${bin} ${prefix}.ini ${prefix}.act\"\n    ${bin} ${prefix}.ini ${prefix}.act\n    ret=$?\n    if find . -name log.1.txt &>/dev/null; then\n        log=`find . -name log.1.txt`\n        cat ${log} | grep -v FAILURE_DETECT | grep -v BEACON | grep -v beacon | grep -v THREAD_POOL_FD >${prefix}.log\n        rm ${log}\n    fi\n\n    if [ ${ret} -ne 0 ]; then\n        echo \"run ${prefix} failed, return value = ${ret}\"\n        if [ -f core ]; then\n            echo \"---- gdb ./dsn.rep_tests.simple_kv core ----\"\n            gdb ./dsn.rep_tests.simple_kv core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n        fi\n        exit 1\n    fi\n}\n\nfunction run_case()\n{\n    id=$1\n\n    if [ -d case-${id} ]; then\n        cd case-${id}\n        ./run.sh\n        if [ $? -ne 0 ]; then\n            exit 1\n        fi\n        cd ..\n        return\n    fi\n\n    if [ -f case-${id}.act ]; then\n        ./clear.sh\n        run_single case-${id}\n        return\n    fi\n\n    subcases=`ls case-${id}-[0-9].act 2>/dev/null | sed -n 's/^case-[0-9][0-9][0-9]-\\([0-9]\\).act$/\\1/p' | sort -u`\n    if [ ! -z \"${subcases}\" ]; then\n        ./clear.sh\n        for subid in ${subcases}; do\n            run_single case-${id}-${subid}\n        done\n        return\n    fi\n\n    echo \"case-${id} not found\"\n    exit 1\n}\n\nif [ $# -eq 0 ]; then\n    if [ ! -z \"${DSN_TEST_FILTER}\" ]; then\n        cases=`echo ${DSN_TEST_FILTER} | sed 's/[,:]/ /g'`\n    else\n        cases=`ls case-* 2>/dev/null | sed -n 's/^case-\\([0-9][0-9][0-9]\\).*$/\\1/p' | sort -u`\n    fi\nelse\n    cases=$*\nfi\n\nif [ ! -z \"${cases}\" ]; then\n    for id in ${cases}; do\n        run_case ${id}\n        echo\n    done\nfi\n\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/simple_kv.main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Replication testing framework.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"checker.h\"\n#include \"injector.h\"\n#include \"case.h\"\n#include \"client.h\"\n#include \"simple_kv.server.impl.h\"\n#include <dsn/http/http_server.h>\n\nvoid dsn_app_registration_simple_kv()\n{\n    dsn::FLAGS_enable_http_server = false;\n    dsn::replication::test::simple_kv_service_impl::register_service();\n\n    dsn::service::meta_service_app::register_all();\n    dsn::replication::replication_service_app::register_all();\n\n    dsn::service_app::register_factory<dsn::replication::test::simple_kv_client_app>(\"client\");\n    dsn::tools::register_toollet<dsn::replication::test::test_injector>(\"test_injector\");\n    dsn::replication::test::install_checkers();\n}\n\nint main(int argc, char **argv)\n{\n    if (argc != 3) {\n        std::cerr << \"USGAE: \" << argv[0] << \" <config-file> <case-input>\" << std::endl;\n        std::cerr << \" e.g.: \" << argv[0] << \" case-000.ini case-000.act\" << std::endl;\n        return -1;\n    }\n\n    dsn::replication::test::g_case_input = argv[2];\n\n    dsn_app_registration_simple_kv();\n\n    // specify what services and tools will run in config file, then run\n    dsn_run(argc - 1, argv, false);\n\n    while (!dsn::replication::test::g_done) {\n        std::this_thread::sleep_for(std::chrono::milliseconds(1));\n    }\n\n    ddebug(\"=== exiting ...\");\n\n    dsn::replication::test::test_checker::instance().exit();\n\n    if (dsn::replication::test::g_fail) {\n#ifndef ENABLE_GCOV\n        dsn_exit(-1);\n#endif\n        return -1;\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(0);\n#endif\n    return 0;\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/simple_kv.server.impl.cpp",
    "content": "/*\n* The MIT License (MIT)\n*\n* Copyright (c) 2015 Microsoft Corporation\n*\n* -=- Robust Distributed System Nucleus (rDSN) -=-\n*\n* Permission is hereby granted, free of charge, to any person obtaining a copy\n* of this software and associated documentation files (the \"Software\"), to deal\n* in the Software without restriction, including without limitation the rights\n* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n* copies of the Software, and to permit persons to whom the Software is\n* furnished to do so, subject to the following conditions:\n*\n* The above copyright notice and this permission notice shall be included in\n* all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n* THE SOFTWARE.\n*/\n#include \"simple_kv.server.impl.h\"\n#include <fstream>\n#include <sstream>\n#include <dsn/utility/filesystem.h>\n\n#define VALUE_NOT_EXIST \"<<not-exist>>\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nbool simple_kv_service_impl::s_simple_kv_open_fail = false;\nbool simple_kv_service_impl::s_simple_kv_close_fail = false;\nbool simple_kv_service_impl::s_simple_kv_get_checkpoint_fail = false;\nbool simple_kv_service_impl::s_simple_kv_apply_checkpoint_fail = false;\n\nsimple_kv_service_impl::simple_kv_service_impl(replica *r) : simple_kv_service(r), _lock(true)\n{\n    reset_state();\n    ddebug(\"simple_kv_service_impl inited\");\n}\n\nvoid simple_kv_service_impl::reset_state()\n{\n    _test_file_learning = dsn_config_get_value_bool(\"test\", \"test_file_learning\", true, \"\");\n    _last_durable_decree = 0;\n}\n\n// RPC_SIMPLE_KV_READ\nvoid simple_kv_service_impl::on_read(const std::string &key, ::dsn::rpc_replier<std::string> &reply)\n{\n    dsn::zauto_lock l(_lock);\n\n    std::string value;\n    auto it = _store.find(key);\n    if (it == _store.end()) {\n        value = VALUE_NOT_EXIST;\n    } else {\n        value = it->second;\n    }\n\n    // ddebug(\"=== on_exec_read:int64_t=%\" PRId64 \",key=%s,value=%s\", last_committed_decree(),\n    // key.c_str(), value.c_str());\n    reply(value);\n}\n\n// RPC_SIMPLE_KV_WRITE\nvoid simple_kv_service_impl::on_write(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n{\n    dsn::zauto_lock l(_lock);\n    _store[pr.key] = pr.value;\n\n    // ddebug(\"=== on_exec_write:int64_t=%\" PRId64 \",key=%s,value=%s\", last_committed_decree(),\n    // pr.key.c_str(), pr.value.c_str());\n    reply(0);\n}\n\n// RPC_SIMPLE_KV_APPEND\nvoid simple_kv_service_impl::on_append(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply)\n{\n    dsn::zauto_lock l(_lock);\n    auto it = _store.find(pr.key);\n    if (it != _store.end())\n        it->second.append(pr.value);\n    else\n        _store[pr.key] = pr.value;\n\n    // ddebug(\"=== on_exec_append:int64_t=%\" PRId64 \",key=%s,value=%s\", last_committed_decree(),\n    // pr.key.c_str(), pr.value.c_str());\n    reply(0);\n}\n\n::dsn::error_code simple_kv_service_impl::start(int argc, char **argv)\n{\n    if (s_simple_kv_open_fail) {\n        return ERR_CORRUPTION;\n    }\n\n    dsn::zauto_lock l(_lock);\n    recover();\n    ddebug(\"simple_kv_service_impl opened\");\n    return ERR_OK;\n}\n\n::dsn::error_code simple_kv_service_impl::stop(bool clear_state)\n{\n    if (s_simple_kv_close_fail) {\n        return ERR_CORRUPTION;\n    }\n\n    dsn::zauto_lock l(_lock);\n    if (clear_state) {\n        if (!dsn::utils::filesystem::remove_path(data_dir().c_str())) {\n            dassert(false, \"Fail to delete directory %s.\", data_dir().c_str());\n        }\n        _store.clear();\n        reset_state();\n    }\n    ddebug(\"simple_kv_service_impl closed, clear_state = %s\", clear_state ? \"true\" : \"false\");\n    return ERR_OK;\n}\n\n// checkpoint related\nvoid simple_kv_service_impl::recover()\n{\n    dsn::zauto_lock l(_lock);\n\n    _store.clear();\n\n    int64_t max_version = 0;\n    std::string name;\n\n    std::vector<std::string> sub_list;\n    std::string path = data_dir();\n    if (!dsn::utils::filesystem::get_subfiles(path, sub_list, false)) {\n        dassert(false, \"Fail to get subfiles in %s.\", path.c_str());\n    }\n    for (auto &fpath : sub_list) {\n        auto &&s = dsn::utils::filesystem::get_file_name(fpath);\n        if (s.substr(0, strlen(\"checkpoint.\")) != std::string(\"checkpoint.\"))\n            continue;\n\n        int64_t version = static_cast<int64_t>(atoll(s.substr(strlen(\"checkpoint.\")).c_str()));\n        if (version > max_version) {\n            max_version = version;\n            name = std::string(data_dir()) + \"/\" + s;\n        }\n    }\n    sub_list.clear();\n\n    if (max_version > 0) {\n        recover(name, max_version);\n        set_last_durable_decree(max_version);\n    }\n    ddebug(\"simple_kv_service_impl recovered, last_durable_decree = %\" PRId64 \"\",\n           last_durable_decree());\n}\n\nvoid simple_kv_service_impl::recover(const std::string &name, int64_t version)\n{\n    dsn::zauto_lock l(_lock);\n\n    std::ifstream is(name.c_str(), std::ios::binary);\n    if (!is.is_open())\n        return;\n\n    _store.clear();\n\n    uint64_t count;\n    int magic;\n\n    is.read((char *)&count, sizeof(count));\n    is.read((char *)&magic, sizeof(magic));\n    dassert(magic == 0xdeadbeef, \"invalid checkpoint\");\n\n    for (uint64_t i = 0; i < count; i++) {\n        std::string key;\n        std::string value;\n\n        uint32_t sz;\n        is.read((char *)&sz, (uint32_t)sizeof(sz));\n        key.resize(sz);\n\n        is.read((char *)&key[0], sz);\n\n        is.read((char *)&sz, (uint32_t)sizeof(sz));\n        value.resize(sz);\n\n        is.read((char *)&value[0], sz);\n\n        _store[key] = value;\n    }\n}\n\n::dsn::error_code simple_kv_service_impl::sync_checkpoint()\n{\n    dsn::zauto_lock l(_lock);\n\n    int64_t last_commit = last_committed_decree();\n    if (last_commit == last_durable_decree()) {\n        ddebug(\"simple_kv_service_impl no need to create checkpoint, \"\n               \"checkpoint already the latest, last_durable_decree = %\" PRId64 \"\",\n               last_durable_decree());\n        return ERR_OK;\n    }\n\n    // TODO: should use async write instead\n    char name[256];\n    sprintf(name, \"%s/checkpoint.%\" PRId64, data_dir().c_str(), last_commit);\n    std::ofstream os(name, std::ios::binary);\n\n    uint64_t count = (uint64_t)_store.size();\n    int magic = 0xdeadbeef;\n\n    os.write((const char *)&count, (uint32_t)sizeof(count));\n    os.write((const char *)&magic, (uint32_t)sizeof(magic));\n\n    for (auto it = _store.begin(); it != _store.end(); ++it) {\n        const std::string &k = it->first;\n        uint32_t sz = (uint32_t)k.length();\n\n        os.write((const char *)&sz, (uint32_t)sizeof(sz));\n        os.write((const char *)&k[0], sz);\n\n        const std::string &v = it->second;\n        sz = (uint32_t)v.length();\n\n        os.write((const char *)&sz, (uint32_t)sizeof(sz));\n        os.write((const char *)&v[0], sz);\n    }\n\n    set_last_durable_decree(last_commit);\n    ddebug(\"simple_kv_service_impl create checkpoint succeed, \"\n           \"last_durable_decree = %\" PRId64 \"\",\n           last_durable_decree());\n    return ERR_OK;\n}\n\n::dsn::error_code simple_kv_service_impl::async_checkpoint(bool flush_memtable)\n{\n    return sync_checkpoint();\n}\n\n// helper routines to accelerate learning\n::dsn::error_code simple_kv_service_impl::get_checkpoint(int64_t learn_start,\n                                                         const dsn::blob &learn_request,\n                                                         /*out*/ learn_state &state)\n{\n    if (s_simple_kv_get_checkpoint_fail) {\n        return ERR_CORRUPTION;\n    }\n\n    if (last_durable_decree() == 0) {\n        sync_checkpoint();\n    }\n\n    if (last_durable_decree() > 0) {\n        char name[256];\n        sprintf(name, \"%s/checkpoint.%\" PRId64, data_dir().c_str(), last_durable_decree());\n\n        state.from_decree_excluded = 0;\n        state.to_decree_included = last_durable_decree();\n        state.files.push_back(std::string(name));\n\n        ddebug(\"simple_kv_service_impl get checkpoint succeed, last_durable_decree = %\" PRId64 \"\",\n               last_durable_decree());\n        return ERR_OK;\n    } else {\n        state.from_decree_excluded = 0;\n        state.to_decree_included = 0;\n        derror(\"simple_kv_service_impl get checkpoint failed, no checkpoint found\");\n        return ERR_OBJECT_NOT_FOUND;\n    }\n}\n\n::dsn::error_code simple_kv_service_impl::storage_apply_checkpoint(chkpt_apply_mode mode,\n                                                                   const learn_state &state)\n{\n    if (s_simple_kv_apply_checkpoint_fail) {\n        return ERR_CORRUPTION;\n    }\n\n    if (mode == replication_app_base::chkpt_apply_mode::learn) {\n        recover(state.files[0], state.to_decree_included);\n        // ddebug(\"simple_kv_service_impl learn checkpoint succeed, last_committed_decree = %\"\n        // PRId64 \"\", last_committed_decree());\n        return ERR_OK;\n    } else {\n        dassert(replication_app_base::chkpt_apply_mode::copy == mode, \"invalid mode %d\", (int)mode);\n        dassert(state.to_decree_included > last_durable_decree(),\n                \"checkpoint's decree is smaller than current\");\n\n        char name[256];\n        sprintf(name, \"%s/checkpoint.%\" PRId64, data_dir().c_str(), state.to_decree_included);\n        std::string lname(name);\n\n        if (!utils::filesystem::rename_path(state.files[0], lname)) {\n            derror(\"simple_kv_service_impl copy checkpoint failed, rename path failed\");\n            return ERR_CHECKPOINT_FAILED;\n        } else {\n            set_last_durable_decree(state.to_decree_included);\n            ddebug(\"simple_kv_service_impl copy checkpoint succeed, last_durable_decree = %\" PRId64\n                   \"\",\n                   last_durable_decree());\n            return ERR_OK;\n        }\n    }\n}\n}\n}\n}\n"
  },
  {
    "path": "src/replica/storage/simple_kv/test/simple_kv.server.impl.h",
    "content": "/*\n* The MIT License (MIT)\n*\n* Copyright (c) 2015 Microsoft Corporation\n*\n* -=- Robust Distributed System Nucleus (rDSN) -=-\n*\n* Permission is hereby granted, free of charge, to any person obtaining a copy\n* of this software and associated documentation files (the \"Software\"), to deal\n* in the Software without restriction, including without limitation the rights\n* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n* copies of the Software, and to permit persons to whom the Software is\n* furnished to do so, subject to the following conditions:\n*\n* The above copyright notice and this permission notice shall be included in\n* all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n* THE SOFTWARE.\n*/\n#pragma once\n\n#include <dsn/tool-api/zlocks.h>\n#include \"replica/storage/simple_kv/simple_kv.server.h\"\n\nnamespace dsn {\nnamespace replication {\nnamespace test {\n\nusing namespace dsn::replication::application;\n\nclass simple_kv_service_impl : public application::simple_kv_service\n{\npublic:\n    static bool s_simple_kv_open_fail;\n    static bool s_simple_kv_close_fail;\n    static bool s_simple_kv_get_checkpoint_fail;\n    static bool s_simple_kv_apply_checkpoint_fail;\n\n    static void register_service()\n    {\n        replication_app_base::register_storage_engine(\n            \"simple_kv\", replication_app_base::create<simple_kv_service_impl>);\n        simple_kv_service::register_rpc_handlers();\n    }\n\npublic:\n    simple_kv_service_impl(replica *r);\n\n    // RPC_SIMPLE_KV_READ\n    virtual void on_read(const std::string &key, ::dsn::rpc_replier<std::string> &reply);\n    // RPC_SIMPLE_KV_WRITE\n    virtual void on_write(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply);\n    // RPC_SIMPLE_KV_APPEND\n    virtual void on_append(const kv_pair &pr, ::dsn::rpc_replier<int32_t> &reply);\n\n    virtual ::dsn::error_code start(int argc, char **argv) override;\n\n    virtual ::dsn::error_code stop(bool cleanup = false) override;\n\n    virtual int64_t last_durable_decree() const override { return _last_durable_decree; }\n\n    virtual ::dsn::error_code sync_checkpoint() override;\n\n    virtual ::dsn::error_code prepare_get_checkpoint(blob &learn_req) { return dsn::ERR_OK; }\n\n    virtual ::dsn::error_code async_checkpoint(bool flush_memtable) override;\n\n    virtual ::dsn::error_code copy_checkpoint_to_dir(const char *checkpoint_dir,\n                                                     int64_t *last_decree,\n                                                     bool flush_memtable = false) override\n    {\n        return ERR_NOT_IMPLEMENTED;\n    }\n\n    virtual ::dsn::error_code get_checkpoint(int64_t learn_start,\n                                             const dsn::blob &learn_request,\n                                             /*out*/ learn_state &state) override;\n\n    virtual ::dsn::error_code storage_apply_checkpoint(chkpt_apply_mode mode,\n                                                       const learn_state &state) override;\n\n    std::string query_compact_state() const override { return \"\"; }\n\n    virtual void update_app_envs(const std::map<std::string, std::string> &envs) {}\n\n    virtual void query_app_envs(/*out*/ std::map<std::string, std::string> &envs) {}\n\n    virtual uint32_t query_data_version() const override { return 0; }\n\n    virtual ::dsn::replication::manual_compaction_status::type query_compact_status() const override\n    {\n        return dsn::replication::manual_compaction_status::IDLE;\n    }\n\nprivate:\n    void recover();\n    void recover(const std::string &name, int64_t version);\n    void set_last_durable_decree(int64_t d) { _last_durable_decree = d; }\n\n    void reset_state();\n\nprivate:\n    typedef std::map<std::string, std::string> simple_kv;\n    simple_kv _store;\n    ::dsn::zlock _lock;\n    bool _test_file_learning;\n\n    int64_t _last_durable_decree;\n};\n}\n}\n}\n"
  },
  {
    "path": "src/replica/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.replica.test)\n\n#Source files under CURRENT project directory will be automatically included.\n#You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#Search mode for source files under CURRENT project directory ?\n#\"GLOB_RECURSE\" for recursive search\n#\"GLOB\" for non - recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_meta_server\n                 dsn_replica_server\n                 dsn.replication.zookeeper_provider\n                 dsn_replication_common\n                 dsn.block_service\n                 dsn.block_service.local\n                 dsn.block_service.fds\n                 dsn.block_service.hdfs\n                 dsn.failure_detector\n                 dsn_http\n                 dsn_runtime\n                 zookeeper\n                 hashtable\n                 gtest)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n#Extra files that will be installed\nset(MY_BINPLACES clear.sh run.sh config-test.ini)\ndsn_add_test()\n"
  },
  {
    "path": "src/replica/test/backup_block_service_mock.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <iostream>\n#include <dsn/utility/filesystem.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/dist/block_service.h>\n#include \"replica/replica_context.h\"\n#include \"replication_service_test_app.h\"\n#include \"block_service/test/block_service_mock.h\"\n#include \"common/backup_common.h\"\n\nusing namespace ::dsn;\nusing namespace ::dsn::dist::block_service;\nusing namespace ::dsn::replication;\n\nextern ref_ptr<block_file_mock> current_chkpt_file;\nextern ref_ptr<block_file_mock> backup_metadata_file;\nextern ref_ptr<block_file_mock> regular_file;\n\nclass backup_block_service_mock : public block_service_mock\n{\npublic:\n    virtual dsn::task_ptr create_file(const create_file_request &req,\n                                      dsn::task_code code,\n                                      const create_file_callback &cb,\n                                      dsn::task_tracker *tracker = nullptr)\n    {\n        create_file_response resp;\n        if (enable_create_file_fail) {\n            resp.err = ERR_MOCK_INTERNAL;\n        } else {\n            resp.err = ERR_OK;\n            auto it = files.find(req.file_name);\n            if (it != files.end()) {\n                resp.file_handle =\n                    new block_file_mock(req.file_name, it->second.first, it->second.second);\n            } else {\n                std::string filename = ::dsn::utils::filesystem::get_file_name(req.file_name);\n                if (filename == cold_backup_constant::CURRENT_CHECKPOINT) {\n                    resp.file_handle = current_chkpt_file;\n                    std::cout << \"current_ckpt_file is selected...\" << std::endl;\n                } else if (filename == cold_backup_constant::BACKUP_METADATA) {\n                    resp.file_handle = backup_metadata_file;\n                    std::cout << \"backup_metadata_file is selected...\" << std::endl;\n                } else {\n                    resp.file_handle = regular_file;\n                    std::cout << \"regular_file is selected...\" << std::endl;\n                }\n            }\n        }\n\n        cb(resp);\n        return task_ptr();\n    }\n};\n"
  },
  {
    "path": "src/replica/test/clear.sh",
    "content": "#!/bin/sh\n#\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nrm -rf core.* data/ log.* replica.* tag* test* test_cluster/\n"
  },
  {
    "path": "src/replica/test/cold_backup_context_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include \"backup_block_service_mock.h\"\n#include \"replica/backup/cold_backup_context.h\"\n\nref_ptr<block_file_mock> current_chkpt_file = new block_file_mock(\"\", 0, \"\");\nref_ptr<block_file_mock> backup_metadata_file = new block_file_mock(\"\", 0, \"\");\nref_ptr<block_file_mock> regular_file = new block_file_mock(\"\", 0, \"\");\n\nstatic std::string backup_root = \"root\";\nstatic backup_request request;\nstatic int32_t concurrent_uploading_file_cnt = 1;\nstd::shared_ptr<backup_block_service_mock> block_service =\n    std::make_shared<backup_block_service_mock>();\n\nvoid replication_service_test_app::check_backup_on_remote_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    // case1 : current_chkpt_file don't exist\n    {\n        std::cout << \"testing current_chkpt_file don't exist...\" << std::endl;\n        backup_context->check_backup_on_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupChecked);\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    // case2 : create current_chkpt_file fail\n    {\n        std::cout << \"testing create current_chkpt_file fail...\" << std::endl;\n        block_service->enable_create_file_fail = true;\n        std::cout << \"ref_counter = \" << backup_context->get_count() << std::endl;\n        backup_context->check_backup_on_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        block_service->enable_create_file_fail = false;\n        std::cout << \"ref_counter = \" << backup_context->get_count() << std::endl;\n    }\n    // case3 : current_chkpt_file exist\n    // this case will call read_current_chkpt_file, so we make read_current_chkpt_file fail to stop\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    {\n        std::cout << \"testing read current_chkpt_file fail...\" << std::endl;\n        current_chkpt_file->enable_read_fail = true;\n        // so current_chkpt_file must exit\n        current_chkpt_file->file_exist(\"123\", 123);\n        backup_context->check_backup_on_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        current_chkpt_file->enable_read_fail = false;\n        current_chkpt_file->clear_file_exist();\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::read_current_chkpt_file_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    // read current_chkpt_file fail has been already tested in check_backup_on_remote_test()\n    // case1: current_chkpt_file is not exist\n    {\n        std::cout << \"testing read_current_chkpt_file(file not exist)...\" << std::endl;\n        current_chkpt_file->clear_file_exist();\n        block_file_ptr file_handle = current_chkpt_file.get();\n        backup_context->read_current_chkpt_file(file_handle);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupChecked);\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    // case2: current_chkpt_file exist\n    //  this case will call remote_chkpt_dir_exist(), so we make list_dir fail to stop\n    {\n        std::cout\n            << \"testing read_current_chkpt_file(file exist and check whether chkpt_dir is exist)...\"\n            << std::endl;\n        current_chkpt_file->file_exist(\"123\", 10);\n        current_chkpt_file->set_context(\"test_dir\");\n        block_service->enable_list_dir_fail = true;\n        block_file_ptr file_handle = current_chkpt_file.get();\n        backup_context->read_current_chkpt_file(file_handle);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        current_chkpt_file->clear_file_exist();\n        current_chkpt_file->clear_context();\n        block_service->enable_list_dir_fail = false;\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::remote_chkpt_dir_exist_test()\n{\n    gpid mock_gpid(1, 2);\n    std::string mock_app_name(\"mock_app\");\n    int64_t mock_backup_id(1000);\n    std::string mock_backup_provider_name(\"mock_backup_provider_name\");\n    request.__set_pid(mock_gpid);\n    request.__set_app_name(mock_app_name);\n    request.__set_backup_id(mock_backup_id);\n    policy_info mock_policy_info;\n    mock_policy_info.__set_backup_provider_type(\"mock_service\");\n    mock_policy_info.__set_policy_name(\"mock_policy\");\n    request.__set_policy(mock_policy_info);\n    // the case that list_dir fail has been already tested\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n\n    // case1: directory is exist\n    {\n        std::cout << \"testing remote checkpoint directory is exist...\" << std::endl;\n        std::string dir_name = std::string(\"test_dir\");\n        current_chkpt_file->file_exist(\"123\", 10);\n        current_chkpt_file->set_context(dir_name);\n\n        std::string parent_dir = cold_backup::get_replica_backup_path(\n            backup_root, mock_app_name, mock_gpid, mock_backup_id);\n\n        std::vector<ls_entry> entries;\n        entries.emplace_back(ls_entry{std::string(dir_name), true});\n        // remote_chkpt_dir_exist() function judge whether the dir-A is exist through listing\n        //      the dir-A's parent path\n        block_service->dir_files.insert(\n            std::make_pair(::dsn::utils::filesystem::get_file_name(parent_dir), entries));\n        backup_context->remote_chkpt_dir_exist(dir_name);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupCompleted);\n        current_chkpt_file->clear_file_exist();\n        current_chkpt_file->clear_context();\n        block_service->dir_files.clear();\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    // case2: directory is not exist\n    {\n        std::cout << \"testing remote checkpoint directory is not exist...\" << std::endl;\n        std::string dir_name = std::string(\"test_dir\");\n        current_chkpt_file->file_exist(\"123\", 10);\n        current_chkpt_file->set_context(dir_name);\n        backup_context->remote_chkpt_dir_exist(dir_name);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupChecked);\n        current_chkpt_file->clear_file_exist();\n        current_chkpt_file->clear_context();\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::upload_checkpoint_to_remote_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    backup_context->_status.store(cold_backup_status::ColdBackupChecking);\n    backup_context->_have_check_upload_status.store(false);\n    backup_context->_upload_status.store(cold_backup_context::upload_status::UploadInvalid);\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case1: create metadata_file fail\n    {\n        std::cout << \"testing upload_checkpoint_to_remote, stop with create metadata fail...\"\n                  << std::endl;\n        block_service->enable_create_file_fail = true;\n        backup_context->upload_checkpoint_to_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        block_service->enable_create_file_fail = false;\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case2: create metadata succeed, and metadata is exist\n    //  this case will stop when call read_backup_metadata with reason read file fail\n    {\n        std::cout << \"testing upload_checkpoint_to_remote, stop with read metadata file fail...\"\n                  << std::endl;\n        backup_context->_have_check_upload_status.store(false);\n        backup_context->_upload_status.store(cold_backup_context::upload_status::UploadInvalid);\n        std::string md5 = \"test_md5\";\n        int64_t size = 10;\n        backup_metadata_file->enable_read_fail = true;\n        backup_metadata_file->file_exist(md5, size);\n        backup_context->upload_checkpoint_to_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        backup_metadata_file->clear_file_exist();\n        backup_metadata_file->enable_read_fail = false;\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case3: create metadata succeed, but metadata is not exist\n    //  this case will stop after call write_metadata_file with write fail\n    {\n        std::cout << \"testing upload_chekpoint_to_remote, stop with create metadata file fail...\"\n                  << std::endl;\n        backup_context->_have_check_upload_status.store(false);\n        backup_context->_upload_status.store(cold_backup_context::upload_status::UploadInvalid);\n        backup_metadata_file->enable_write_fail = true;\n        backup_context->upload_checkpoint_to_remote();\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        backup_metadata_file->enable_write_fail = false;\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::read_backup_metadata_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case1: metadata is valid\n    //  stop with create current_chkpt_file fail\n    {\n        std::cout << \"testing read_backup_metadata_file, with context of metadata is valid...\"\n                  << std::endl;\n        blob buf = ::json::json_forwarder<cold_backup_metadata>::encode(backup_context->_metadata);\n        std::string context(buf.data(), buf.length());\n        backup_metadata_file->set_context(context);\n        backup_metadata_file->file_exist(\"test_md5\", 10);\n        block_service->enable_create_file_fail = true;\n        ref_ptr<block_file> file_handle = backup_metadata_file.get();\n        backup_context->read_backup_metadata(file_handle);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        block_service->enable_create_file_fail = false;\n        backup_metadata_file->clear_context();\n        backup_metadata_file->clear_file_exist();\n    }\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case2: metadata is invalid\n    //  stop with create current_chkpt_file fail\n    {\n        std::cout << \"testing read_backup_metada_file, with context of metadata is invalid...\"\n                  << std::endl;\n        backup_metadata_file->file_exist(\"test_md5\", 10);\n        backup_metadata_file->set_context(\"{\\\"key\\\":value\\\"\");\n        block_service->enable_create_file_fail = true;\n        ref_ptr<block_file> file_handle = backup_metadata_file.get();\n        backup_context->read_backup_metadata(file_handle);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        block_service->enable_create_file_fail = false;\n        backup_metadata_file->clear_file_exist();\n        backup_metadata_file->clear_context();\n    }\n    // case3: read metadata fail\n    //  this case has been already tested before, here just ignore\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::on_upload_chkpt_dir_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n\n    backup_context->_upload_status.store(cold_backup_context::upload_status::UploadUncomplete);\n    backup_context->_max_concurrent_uploading_file_cnt = 2;\n    // case1: empty checkpoint file has been already tested in read_backup_metadata\n    //  so, here just ignore\n\n    // case2: checkpoint file is not empty\n    {\n        std::cout << \"testing on_upload_chkpt_dir with non-empty checkpoint files...\" << std::endl;\n        // smiulate some files, because file is not truly exist, so we must ignore testing\n        // prepare_upload\n        // TODO: find a better way to test prepare_upload\n        std::string test_file1 = \"test_file1\";\n        int32_t file1_size = 10;\n        std::string file1_md5 = \"test_file1_md5\";\n\n        std::string test_file2 = \"test_file2\";\n        int32_t file2_size = 10;\n        std::string file2_md5 = \"test_file2_md5\";\n\n        backup_context->checkpoint_files.emplace_back(test_file1);\n        backup_context->checkpoint_files.emplace_back(test_file2);\n\n        {\n            // should smiulate prepare_upload here\n            backup_context->_file_remain_cnt = 2;\n\n            file_meta f_meta;\n            f_meta.name = test_file1;\n            f_meta.md5 = file1_md5;\n            f_meta.size = file1_size;\n            backup_context->_file_status.insert(\n                std::make_pair(test_file1, cold_backup_context::file_status::FileUploadUncomplete));\n            backup_context->_file_infos.insert(\n                std::make_pair(test_file1, std::make_pair(file1_size, file1_md5)));\n            backup_context->checkpoint_file_total_size = file1_size;\n            backup_context->_metadata.files.emplace_back(f_meta);\n\n            f_meta.name = test_file2;\n            f_meta.md5 = file2_md5;\n            f_meta.size = file2_size;\n            backup_context->_file_status.insert(\n                std::make_pair(test_file2, cold_backup_context::file_status::FileUploadUncomplete));\n            backup_context->_file_infos.insert(\n                std::make_pair(test_file2, std::make_pair(file2_size, file2_md5)));\n            backup_context->checkpoint_file_total_size += file2_size;\n            backup_context->_metadata.files.emplace_back(f_meta);\n        }\n\n        backup_metadata_file->enable_write_fail = true;\n        regular_file->size = 10;\n        backup_context->on_upload_chkpt_dir();\n        std::cout << cold_backup_status_to_string(backup_context->status()) << std::endl;\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        regular_file->clear_file_exist();\n        backup_metadata_file->enable_write_fail = false;\n        backup_context->_metadata.files.clear();\n        backup_context->checkpoint_files.clear();\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::write_backup_metadata_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case1: create backup_metadata file fail\n    //  this case has been already tested\n\n    // case2: create backup_metadata file succeed, but write file fail\n    //  this case has been already tested\n\n    // case3: create backup_metadata file succeed, and write file succeed\n    {\n        std::cout << \"create backup_metadata_file succeed, and write file succeed...\" << std::endl;\n        std::string test_file1 = \"test_file1\";\n        std::string test_file2 = \"test_file2\";\n        backup_context->_metadata.checkpoint_decree = 100;\n\n        file_meta f_meta;\n        f_meta.name = test_file1;\n        f_meta.md5 = \"test_file1_md5\";\n        f_meta.size = 10;\n        backup_context->_metadata.files.emplace_back(f_meta);\n        f_meta.name = test_file2;\n        f_meta.md5 = \"test_file2_md5\";\n        f_meta.size = 11;\n        backup_context->_metadata.files.emplace_back(f_meta);\n\n        blob result =\n            ::json::json_forwarder<cold_backup_metadata>::encode(backup_context->_metadata);\n        std::string value(result.data(), result.length());\n        current_chkpt_file->enable_write_fail = true;\n        backup_context->write_backup_metadata();\n        std::string value_write(backup_metadata_file->context.data(),\n                                backup_metadata_file->context.length());\n        ASSERT_TRUE(result.data() != backup_metadata_file->context.data());\n        ASSERT_TRUE(value == value_write);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupFailed);\n        current_chkpt_file->enable_write_fail = false;\n        backup_context->_metadata.files.clear();\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n\nvoid replication_service_test_app::write_current_chkpt_file_test()\n{\n    cold_backup_context_ptr backup_context =\n        new cold_backup_context(nullptr, request, concurrent_uploading_file_cnt);\n\n    backup_context->start_check();\n    backup_context->block_service = block_service.get();\n    backup_context->backup_root = backup_root;\n    backup_context->_status.store(cold_backup_status::ColdBackupUploading);\n    // case1: create current_chkpt_file succeed, and write succeed\n    {\n        std::string value = \"test write_current_chkpt_file\";\n        backup_context->write_current_chkpt_file(value);\n\n        std::string result(current_chkpt_file->context.data(),\n                           current_chkpt_file->context.length());\n        ASSERT_TRUE(value == result);\n        ASSERT_TRUE(backup_context->status() == cold_backup_status::ColdBackupCompleted);\n        ASSERT_TRUE(backup_context->_progress.load() >= 1000);\n    }\n    ASSERT_TRUE(backup_context->get_count() == 1);\n    ASSERT_TRUE(current_chkpt_file->get_count() == 1);\n    ASSERT_TRUE(backup_metadata_file->get_count() == 1);\n    ASSERT_TRUE(regular_file->get_count() == 1);\n}\n"
  },
  {
    "path": "src/replica/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.replica]\ntype = replica\nrun = true\ncount = 1\nports = 54321\npools = THREAD_POOL_DEFAULT,THREAD_POOL_REPLICATION_LONG,THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG,THREAD_POOL_BLOCK_SERVICE\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_DEBUG\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 1465902258\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 4\n\n[threadpool.THREAD_POOL_DEFAULT]\nname = default\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 2\n\n[threadpool.THREAD_POOL_REPLICATION]\nname = replica\npartitioned = true\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_count = 3\n\n[threadpool.THREAD_POOL_REPLICATION_LONG]\nname = replica_long\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[block_service.local_service]\ntype = local_service\nargs =\n"
  },
  {
    "path": "src/replica/test/log_block_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include \"replica_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass log_block_test : public replica_test_base\n{\n};\n\nTEST_F(log_block_test, constructor)\n{\n    log_block block(1);\n    ASSERT_EQ(block.data().size(), 1);\n    ASSERT_EQ(block.size(), 16);\n    ASSERT_EQ(block.start_offset(), 1);\n}\n\nTEST_F(log_block_test, log_block_header)\n{\n    log_block block(10);\n    auto hdr = (log_block_header *)block.front().data();\n    ASSERT_EQ(hdr->magic, 0xdeadbeef);\n    ASSERT_EQ(hdr->length, 0);\n    ASSERT_EQ(hdr->body_crc, 0);\n}\n\nclass log_appender_test : public replica_test_base\n{\n};\n\nTEST_F(log_appender_test, constructor)\n{\n    log_block block;\n    binary_writer temp_writer;\n    temp_writer.write(8);\n    block.add(temp_writer.get_buffer());\n\n    log_appender appender(10, block);\n    ASSERT_EQ(appender.start_offset(), 10);\n    ASSERT_EQ(appender.blob_count(), 2);\n    ASSERT_EQ(appender.all_blocks().size(), 1);\n    ASSERT_EQ(appender.mutations().size(), 0);\n    ASSERT_EQ(appender.callbacks().size(), 0);\n}\n\nTEST_F(log_appender_test, append_mutation)\n{\n    log_appender appender(10);\n    for (int i = 0; i < 5; i++) {\n        appender.append_mutation(create_test_mutation(1 + i, \"test\"), nullptr);\n    }\n    ASSERT_EQ(appender.start_offset(), 10);\n    ASSERT_EQ(appender.mutations().size(), 5);\n\n    // each mutation occupies 2 blobs, one for mutation header, one for mutation data.\n    ASSERT_EQ(appender.blob_count(), 1 + 5 * 2);\n}\n\nTEST_F(log_appender_test, log_block_not_full)\n{\n    log_appender appender(10);\n    for (int i = 0; i < 5; i++) {\n        appender.append_mutation(create_test_mutation(1 + i, \"test\"), nullptr);\n    }\n    ASSERT_EQ(appender.mutations().size(), 5);\n    ASSERT_EQ(appender.blob_count(), 1 + 5 * 2);\n    ASSERT_EQ(appender.start_offset(), 10);\n    ASSERT_EQ(appender.all_blocks().size(), 1);\n    ASSERT_EQ(appender.callbacks().size(), 0);\n    ASSERT_EQ(appender.mutations().size(), 5);\n\n    auto block = appender.all_blocks()[0];\n    ASSERT_EQ(block.start_offset(), 10);\n    ASSERT_EQ(block.data().size(), 1 + 5 * 2);\n}\n\nTEST_F(log_appender_test, log_block_full)\n{\n    log_appender appender(10);\n    for (int i = 0; i < 1024; i++) { // more than DEFAULT_MAX_BLOCK_BYTES\n        appender.append_mutation(create_test_mutation(1 + i, std::string(1024, 'a')), nullptr);\n    }\n    ASSERT_EQ(appender.mutations().size(), 1024);\n    // two log_block_header blobs\n    ASSERT_EQ(appender.blob_count(), 2 + 1024 * 2);\n    // the first block's start offset\n    ASSERT_EQ(appender.start_offset(), 10);\n    // two log_blocks\n    ASSERT_EQ(appender.all_blocks().size(), 2);\n\n    size_t sz = 0;\n    size_t start_offset = 10;\n    for (const log_block &blk : appender.all_blocks()) {\n        ASSERT_EQ(start_offset, blk.start_offset());\n        sz += blk.size();\n        start_offset += blk.size();\n    }\n    ASSERT_EQ(sz, appender.size());\n}\n\nTEST_F(log_appender_test, read_log_block)\n{\n    log_appender appender(10);\n    for (int i = 0; i < 1024; i++) { // more than DEFAULT_MAX_BLOCK_BYTES\n        appender.append_mutation(create_test_mutation(1 + i, std::string(1024, 'a')), nullptr);\n    }\n    ASSERT_EQ(appender.all_blocks().size(), 2);\n\n    // merge into an continuous buffer, which may contains multiple blocks\n    std::string buffer;\n    for (const auto &block : appender.all_blocks()) {\n        for (const blob &bb : block.data()) {\n            buffer += bb.to_string();\n        }\n    }\n    ASSERT_EQ(buffer.size(), appender.size());\n\n    // read from buffer\n    auto bb = blob::create_from_bytes(std::move(buffer));\n    binary_reader reader(bb);\n    int block_idx = 0;\n    int mutation_idx = 0;\n    while (!reader.is_eof()) {\n        blob tmp_bb;\n\n        ASSERT_GT(appender.all_blocks().size(), block_idx);\n        ASSERT_GE(reader.get_remaining_size(), sizeof(log_block_header));\n        reader.read(tmp_bb, sizeof(log_block_header));\n\n        const auto &expected_block = appender.all_blocks()[block_idx];\n        size_t blk_len = expected_block.size() - sizeof(log_block_header);\n        ASSERT_GE(reader.get_remaining_size(), blk_len);\n        blob blk_bb;\n        reader.read(blk_bb, blk_len);\n        binary_reader blk_reader(blk_bb); // reads the log block\n        while (!blk_reader.is_eof()) {\n            size_t read_len = blk_len - blk_reader.get_remaining_size();\n            mutation_ptr mu = mutation::read_from(blk_reader, nullptr);\n            ASSERT_EQ(mu->data.header.log_offset,\n                      read_len + expected_block.start_offset() + sizeof(log_block_header));\n            mutation_idx++;\n        }\n\n        block_idx++;\n    }\n    ASSERT_EQ(block_idx, appender.all_blocks().size());\n    ASSERT_EQ(mutation_idx, 1024);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/log_file_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include \"replica_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass log_file_test : public replica_test_base\n{\npublic:\n    void SetUp() override\n    {\n        utils::filesystem::remove_path(_log_dir);\n        utils::filesystem::create_directory(_log_dir);\n        _logf = log_file::create_write(_log_dir.c_str(), 1, _start_offset);\n    }\n\n    void TearDown() override\n    {\n        _logf->close();\n        utils::filesystem::remove_path(_log_dir);\n    }\n\nprotected:\n    log_file_ptr _logf;\n    size_t _start_offset{10};\n};\n\nTEST_F(log_file_test, commit_log_blocks)\n{\n    // write one block\n    auto appender = std::make_shared<log_appender>(_start_offset);\n    for (int i = 0; i < 5; i++) {\n        appender->append_mutation(create_test_mutation(1 + i, \"test\"), nullptr);\n    }\n    auto tsk = _logf->commit_log_blocks(*appender,\n                                        LPC_WRITE_REPLICATION_LOG_PRIVATE,\n                                        nullptr,\n                                        [&](error_code err, size_t sz) {\n                                            ASSERT_EQ(err, ERR_OK);\n                                            ASSERT_EQ(sz, appender->size());\n                                        },\n                                        0);\n    tsk->wait();\n    ASSERT_EQ(tsk->get_aio_context()->buffer_size, appender->size());\n    ASSERT_EQ(tsk->get_aio_context()->file_offset,\n              appender->start_offset() - _start_offset); // local offset\n\n    // write multiple blocks\n    size_t written_sz = appender->size();\n    appender = std::make_shared<log_appender>(_start_offset + written_sz);\n    for (int i = 0; i < 1024; i++) { // more than DEFAULT_MAX_BLOCK_BYTES\n        appender->append_mutation(create_test_mutation(1 + i, std::string(1024, 'a')), nullptr);\n    }\n    ASSERT_GT(appender->all_blocks().size(), 1);\n    tsk = _logf->commit_log_blocks(*appender,\n                                   LPC_WRITE_REPLICATION_LOG_PRIVATE,\n                                   nullptr,\n                                   [&](error_code err, size_t sz) {\n                                       ASSERT_EQ(err, ERR_OK);\n                                       ASSERT_EQ(sz, appender->size());\n                                   },\n                                   0);\n    tsk->wait();\n    ASSERT_EQ(tsk->get_aio_context()->buffer_size, appender->size());\n    ASSERT_EQ(tsk->get_aio_context()->file_offset, appender->start_offset() - _start_offset);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <cmath>\n#include <fstream>\n#include <iostream>\n#include <thread>\n\n#include <gtest/gtest.h>\n#include <dsn/dist/replication/replication_service_app.h>\n\n#include \"replication_service_test_app.h\"\n\nint gtest_flags = 0;\nint gtest_ret = 0;\nreplication_service_test_app *app;\n\nTEST(cold_backup_context, check_backup_on_remote) { app->check_backup_on_remote_test(); }\n\nTEST(cold_backup_context, read_current_chkpt_file) { app->read_current_chkpt_file_test(); }\n\nTEST(cold_backup_context, remote_chkpt_dir_exist) { app->remote_chkpt_dir_exist_test(); }\n\nTEST(cold_backup_context, upload_checkpoint_to_remote) { app->upload_checkpoint_to_remote_test(); }\n\nTEST(cold_backup_context, read_backup_metadata) { app->read_backup_metadata_test(); }\n\nTEST(cold_backup_context, on_upload_chkpt_dir) { app->on_upload_chkpt_dir_test(); }\n\nTEST(cold_backup_context, write_metadata_file) { app->write_backup_metadata_test(); }\n\nTEST(cold_backup_context, write_current_chkpt_file) { app->write_current_chkpt_file_test(); }\n\nerror_code replication_service_test_app::start(const std::vector<std::string> &args)\n{\n    app = this;\n    gtest_ret = RUN_ALL_TESTS();\n    gtest_flags = 1;\n    return dsn::ERR_OK;\n}\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    dsn::service_app::register_factory<replication_service_test_app>(\"replica\");\n\n    dsn_run_config(\"config-test.ini\", false);\n    while (gtest_flags == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(gtest_ret);\n#endif\n    return gtest_ret;\n}\n"
  },
  {
    "path": "src/replica/test/mock_utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/dist/replication/mutation_duplicator.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n\n#include \"replica/replica.h\"\n#include \"replica/replica_stub.h\"\n#include \"replica/backup/cold_backup_context.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass mock_replication_app_base : public replication_app_base\n{\npublic:\n    explicit mock_replication_app_base(replica *replica) : replication_app_base(replica) {}\n\n    error_code start(int, char **) override { return ERR_NOT_IMPLEMENTED; }\n    error_code stop(bool) override { return ERR_NOT_IMPLEMENTED; }\n    error_code sync_checkpoint() override { return ERR_OK; }\n    error_code async_checkpoint(bool) override\n    {\n        _last_durable_decree = _expect_last_durable_decree;\n        return ERR_OK;\n    }\n    error_code prepare_get_checkpoint(blob &) override { return ERR_NOT_IMPLEMENTED; }\n    error_code get_checkpoint(int64_t learn_start,\n                              const dsn::blob &learn_request,\n                              dsn::replication::learn_state &state) override\n    {\n        state.to_decree_included = last_durable_decree();\n        return ERR_OK;\n    }\n    error_code storage_apply_checkpoint(chkpt_apply_mode, const learn_state &) override\n    {\n        return ERR_OK;\n    }\n    error_code copy_checkpoint_to_dir(const char *checkpoint_dir,\n                                      /*output*/ int64_t *last_decree,\n                                      bool flush_memtable = false) override\n    {\n        if (last_decree != nullptr) {\n            *last_decree = _decree;\n        }\n\n        utils::filesystem::create_file(fmt::format(\"{}/checkpoint.file\", checkpoint_dir));\n        return ERR_OK;\n    }\n    int on_request(message_ex *request) override { return 0; }\n    std::string query_compact_state() const { return \"\"; };\n\n    // we mock the followings\n    void update_app_envs(const std::map<std::string, std::string> &envs) override { _envs = envs; }\n    void query_app_envs(std::map<std::string, std::string> &out) override { out = _envs; }\n    decree last_durable_decree() const override { return _last_durable_decree; }\n\n    // TODO(heyuchen): implement this function in further pull request\n    void set_partition_version(int32_t partition_version) override {}\n\n    void set_ingestion_status(ingestion_status::type status) { _ingestion_status = status; }\n    ingestion_status::type get_ingestion_status() override { return _ingestion_status; }\n\n    uint32_t query_data_version() const { return 1; }\n\n    manual_compaction_status::type query_compact_status() const\n    {\n        return manual_compaction_status::IDLE;\n    }\n\n    void set_last_durable_decree(decree d) { _last_durable_decree = d; }\n\n    void set_expect_last_durable_decree(decree d) { _expect_last_durable_decree = d; }\n\nprivate:\n    std::map<std::string, std::string> _envs;\n    decree _decree = 5;\n    ingestion_status::type _ingestion_status;\n    decree _last_durable_decree{0};\n    decree _expect_last_durable_decree{0};\n};\n\nclass mock_replica : public replica\n{\npublic:\n    mock_replica(replica_stub *stub,\n                 gpid gpid,\n                 const app_info &app,\n                 const char *dir,\n                 bool need_restore = false,\n                 bool is_duplication_follower = false)\n        : replica(stub, gpid, app, dir, need_restore, is_duplication_follower)\n    {\n        _app = make_unique<replication::mock_replication_app_base>(this);\n    }\n\n    void register_service()\n    {\n        _app->register_storage_engine(\"replica\",\n                                      replication_app_base::create<mock_replication_app_base>);\n    }\n\n    ~mock_replica() override\n    {\n        _config.status = partition_status::PS_INACTIVE;\n        _tracker.wait_outstanding_tasks();\n        _app.reset(nullptr);\n    }\n\n    void init_private_log(const std::string &log_dir)\n    {\n        utils::filesystem::remove_path(log_dir);\n\n        _private_log =\n            new mutation_log_private(log_dir, _options->log_private_file_size_mb, get_gpid(), this);\n\n        error_code err =\n            _private_log->open(nullptr, [this](error_code err) { dcheck_eq_replica(err, ERR_OK); });\n        dcheck_eq_replica(err, ERR_OK);\n    }\n\n    void init_private_log(mutation_log_ptr log) { _private_log = std::move(log); }\n\n    replica_duplicator_manager &get_replica_duplicator_manager() { return *_duplication_mgr; }\n\n    void as_primary() { _config.status = partition_status::PS_PRIMARY; }\n\n    void as_secondary() { _config.status = partition_status::PS_SECONDARY; }\n\n    void mock_max_gced_decree(decree d) { _max_gced_decree = d; }\n\n    decree max_gced_decree_no_lock() const override\n    {\n        if (_max_gced_decree == (invalid_decree - 1)) {\n            // if the value is not fake, return the real value from replica.\n            return replica::max_gced_decree_no_lock();\n        }\n        return _max_gced_decree;\n    }\n    /// helper functions\n    void set_replica_config(replica_configuration &config) { _config = config; }\n    void set_partition_status(partition_status::type status) { _config.status = status; }\n    void set_last_committed_decree(decree d) { _prepare_list->reset(d); }\n    prepare_list *get_plist() const { return _prepare_list.get(); }\n    void prepare_list_truncate(decree d) { _prepare_list->truncate(d); }\n    void prepare_list_commit_hard(decree d) { _prepare_list->commit(d, COMMIT_TO_DECREE_HARD); }\n    decree get_app_last_committed_decree() { return _app->last_committed_decree(); }\n    void set_app_last_committed_decree(decree d) { _app->_last_committed_decree = d; }\n    void set_primary_partition_configuration(partition_configuration &pconfig)\n    {\n        _primary_states.membership = pconfig;\n    }\n    partition_bulk_load_state get_secondary_bulk_load_state(const rpc_address &node)\n    {\n        return _primary_states.secondary_bulk_load_states[node];\n    }\n    void set_secondary_bulk_load_state(const rpc_address &node,\n                                       const partition_bulk_load_state &state)\n    {\n        _primary_states.secondary_bulk_load_states[node] = state;\n    }\n    void set_is_empty_prepare_sent(bool flag)\n    {\n        _primary_states.ingestion_is_empty_prepare_sent = flag;\n    }\n    bool is_ingestion() { return _is_bulk_load_ingestion; }\n    void set_is_ingestion(bool flag) { _is_bulk_load_ingestion = flag; }\n    void set_ingestion_status(ingestion_status::type status) { _app->set_ingestion_status(status); }\n    ingestion_status::type get_ingestion_status() { return _app->get_ingestion_status(); }\n    bool is_primary_bulk_load_states_cleaned()\n    {\n        return (!_primary_states.ingestion_is_empty_prepare_sent &&\n                _primary_states.secondary_bulk_load_states.size() == 0);\n    }\n\n    // mock cold backup related function.\n    void generate_backup_checkpoint(cold_backup_context_ptr backup_context) override\n    {\n        if (backup_context->status() != ColdBackupCheckpointing) {\n            ddebug(\"%s: ignore generating backup checkpoint because backup_status = %s\",\n                   backup_context->name,\n                   cold_backup_status_to_string(backup_context->status()));\n            backup_context->ignore_checkpoint();\n            return;\n        }\n        backup_context->complete_checkpoint();\n    }\n\n    void update_last_durable_decree(decree decree)\n    {\n        dynamic_cast<mock_replication_app_base *>(_app.get())->set_last_durable_decree(decree);\n    }\n\n    void update_expect_last_durable_decree(decree decree)\n    {\n        dynamic_cast<mock_replication_app_base *>(_app.get())\n            ->set_expect_last_durable_decree(decree);\n    }\n\nprivate:\n    decree _max_gced_decree{invalid_decree - 1};\n};\ntypedef dsn::ref_ptr<mock_replica> mock_replica_ptr;\n\ninline std::unique_ptr<mock_replica> create_mock_replica(replica_stub *stub,\n                                                         int appid = 1,\n                                                         int partition_index = 1,\n                                                         const char *dir = \"./\")\n{\n    gpid gpid(appid, partition_index);\n    app_info app_info;\n    app_info.app_type = \"replica\";\n    app_info.app_name = \"temp\";\n\n    return make_unique<mock_replica>(stub, gpid, app_info, dir);\n}\n\nclass mock_replica_stub : public replica_stub\n{\npublic:\n    mock_replica_stub() = default;\n\n    ~mock_replica_stub() override = default;\n\n    void add_replica(replica *r) { _replicas[r->get_gpid()] = replica_ptr(r); }\n\n    mock_replica *add_primary_replica(int appid, int part_index = 1)\n    {\n        auto r = add_non_primary_replica(appid, part_index);\n        r->as_primary();\n        return r;\n    }\n\n    mock_replica *add_non_primary_replica(int appid, int part_index = 1)\n    {\n        auto r = create_mock_replica(this, appid, part_index).release();\n        add_replica(r);\n        mock_replicas[gpid(appid, part_index)] = r;\n        return r;\n    }\n\n    mock_replica *find_replica(int appid, int part_index = 1)\n    {\n        return mock_replicas[gpid(appid, part_index)];\n    }\n\n    void set_state_connected() { _state = replica_node_state::NS_Connected; }\n\n    rpc_address get_meta_server_address() const override { return rpc_address(\"127.0.0.2\", 12321); }\n\n    std::map<gpid, mock_replica *> mock_replicas;\n\n    /// helper functions\n    mock_replica_ptr\n    generate_replica_ptr(const app_info &info,\n                         gpid pid,\n                         partition_status::type status = partition_status::PS_INACTIVE,\n                         ballot b = 5,\n                         bool need_restore = false,\n                         bool is_duplication_follower = false)\n    {\n        replica_configuration config;\n        config.ballot = b;\n        config.pid = pid;\n        config.status = status;\n\n        mock_replica_ptr rep =\n            new mock_replica(this, pid, info, \"./\", need_restore, is_duplication_follower);\n        rep->set_replica_config(config);\n        _replicas[pid] = rep;\n\n        return rep;\n    }\n\n    replica *generate_replica(const app_info &info,\n                              gpid pid,\n                              partition_status::type status = partition_status::PS_INACTIVE,\n                              ballot b = 5,\n                              bool need_restore = false,\n                              bool is_duplication_follower = false)\n    {\n        replica_configuration config;\n        config.ballot = b;\n        config.pid = pid;\n        config.status = status;\n\n        auto data_dirs = std::vector<std::string>{\"./\"};\n        auto data_dirs_tag = std::vector<std::string>{\"tag\"};\n        initialize_fs_manager(data_dirs, data_dirs_tag);\n        auto *rep = new mock_replica(this, pid, info, \"./\", need_restore, is_duplication_follower);\n        rep->set_replica_config(config);\n        return rep;\n    }\n\n    void generate_replicas_base_dir_nodes_for_app(app_info mock_app,\n                                                  int primary_count_for_disk = 1,\n                                                  int secondary_count_for_disk = 2)\n    {\n        const auto &dir_nodes = _fs_manager._dir_nodes;\n        for (auto &dir_node : dir_nodes) {\n            const auto &replica_iter = dir_node->holding_replicas.find(mock_app.app_id);\n            if (replica_iter == dir_node->holding_replicas.end()) {\n                continue;\n            }\n            const std::set<gpid> &pids = replica_iter->second;\n            int primary_count = primary_count_for_disk;\n            int secondary_count = secondary_count_for_disk;\n            for (const gpid &pid : pids) {\n                // generate primary replica and secondary replica.\n                if (primary_count-- > 0) {\n                    add_replica(generate_replica_ptr(\n                        mock_app, pid, partition_status::PS_PRIMARY, mock_app.app_id));\n                } else if (secondary_count-- > 0) {\n                    add_replica(generate_replica_ptr(\n                        mock_app, pid, partition_status::PS_SECONDARY, mock_app.app_id));\n                }\n            }\n        }\n    }\n\n    void set_log(mutation_log_ptr log) { _log = log; }\n\n    int32_t get_bulk_load_downloading_count() const { return _bulk_load_downloading_count.load(); }\n    void set_bulk_load_downloading_count(int32_t count)\n    {\n        _bulk_load_downloading_count.store(count);\n    }\n\n    void set_rpc_address(const rpc_address &address) { _primary_address = address; }\n};\n\nclass mock_log_file : public log_file\n{\npublic:\n    mock_log_file(const std::string path, int index)\n        : log_file(path.c_str(), nullptr, index, 0, false)\n    {\n    }\n\n    void set_file_size(int size) { _end_offset = _start_offset + size; }\n};\ntypedef dsn::ref_ptr<mock_log_file> mock_log_file_ptr;\n\nclass mock_mutation_log_private : public mutation_log_private\n{\npublic:\n    mock_mutation_log_private(dsn::gpid pid, dsn::replication::replica *r)\n        : mutation_log_private(\"\", 10, pid, r)\n    {\n    }\n\n    dsn::task_ptr append(dsn::replication::mutation_ptr &mu,\n                         dsn::task_code callback_code,\n                         dsn::task_tracker *tracker,\n                         dsn::aio_handler &&callback,\n                         int hash = 0,\n                         int64_t *pending_size = nullptr) override\n    {\n        _mu_list.push_back(mu);\n        return nullptr;\n    }\n\n    void get_in_memory_mutations(decree start_decree,\n                                 ballot start_ballot,\n                                 std::vector<mutation_ptr> &mutation_list) const override\n    {\n        for (auto &mu : _mu_list) {\n            ballot current_ballot =\n                (start_ballot == invalid_ballot) ? invalid_ballot : mu->get_ballot();\n            if ((mu->get_decree() >= start_decree && start_ballot == current_ballot) ||\n                current_ballot > start_ballot) {\n                mutation_list.push_back(mu);\n            }\n        }\n    }\n\n    static error_code replay(std::vector<std::string> &log_files,\n                             replay_callback callback,\n                             /*out*/ int64_t &end_offset)\n    {\n        return dsn::ERR_OK;\n    }\n\n    void add_log_file(dsn::replication::log_file_ptr lf) { _log_files[lf->index()] = lf; }\n\nprivate:\n    std::vector<dsn::replication::mutation_ptr> _mu_list;\n};\ntypedef dsn::ref_ptr<mock_mutation_log_private> mock_mutation_log_private_ptr;\n\nclass mock_mutation_log_shared : public mutation_log_shared\n{\npublic:\n    mock_mutation_log_shared(const std::string &dir) : mutation_log_shared(dir, 1000, false) {}\n\n    ::dsn::task_ptr append(mutation_ptr &mu,\n                           dsn::task_code callback_code,\n                           dsn::task_tracker *tracker,\n                           aio_handler &&callback,\n                           int hash = 0,\n                           int64_t *pending_size = nullptr)\n    {\n        _mu_list.push_back(mu);\n        return nullptr;\n    }\n\n    void flush() {}\n    void flush_once() {}\n\nprivate:\n    std::vector<dsn::replication::mutation_ptr> _mu_list;\n};\ntypedef dsn::ref_ptr<mock_mutation_log_shared> mock_mutation_log_shared_ptr;\n\nstruct mock_mutation_duplicator : public mutation_duplicator\n{\n    explicit mock_mutation_duplicator(replica_base *r) : mutation_duplicator(r) {}\n\n    void duplicate(mutation_tuple_set mut, callback cb) override { _func(mut, cb); }\n\n    typedef std::function<void(mutation_tuple_set, callback)> duplicate_function;\n    static void mock(duplicate_function hook) { _func = std::move(hook); }\n    static duplicate_function _func;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/mutation_log_learn_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica/mutation_log.h\"\n#include \"replica_test_base.h\"\n\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n#include <chrono>\n#include <condition_variable>\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation_log_test : public replica_test_base\n{\n};\n\nTEST_F(mutation_log_test, learn)\n{\n    std::chrono::steady_clock clock;\n    gpid gpid(1, 1);\n    std::string str = \"hello, world!\";\n    std::string logp = _log_dir;\n\n    // prepare mutations\n    std::vector<mutation_ptr> mutations;\n    auto time_tic = clock.now();\n    for (int i = 0; i < 1000; i++) {\n        mutation_ptr mu(new mutation());\n        mu->data.header.ballot = 1;\n        mu->data.header.decree = i + 2;\n        mu->data.header.pid = gpid;\n        mu->data.header.last_committed_decree = i;\n        mu->data.header.log_offset = 0;\n\n        binary_writer writer;\n        // we want a update mutation to be ~4kByte\n        for (int j = 0; j < 300; j++) {\n            writer.write(str);\n        }\n        mu->data.updates.push_back(mutation_update());\n        mu->data.updates.back().code = RPC_REPLICATION_WRITE_EMPTY;\n        mu->data.updates.back().data = writer.get_buffer();\n\n        mu->client_requests.push_back(nullptr);\n\n        mutations.push_back(mu);\n    }\n    auto time_toc = clock.now();\n    std::cout << \"prepare mutations time(us): \"\n              << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n              << std::endl;\n\n    decree learn_points[] = {584, 585, 586, 594, 595, 596, 604, 605, 606};\n\n    for (auto &lp : learn_points) {\n        // prepare dir\n        utils::filesystem::remove_path(logp);\n        utils::filesystem::create_directory(logp);\n\n        // writing logs\n        time_tic = clock.now();\n        mutation_log_ptr mlog = new mutation_log_private(logp, 32, gpid, _replica.get());\n        mlog->open(nullptr, nullptr);\n        for (auto &mu : mutations) {\n            mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n        }\n        mlog->flush();\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: write time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // gc\n        time_tic = clock.now();\n        decree durable_decree = lp;\n        mlog->garbage_collection(gpid, durable_decree, 0, 0, 0);\n        mlog->close();\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: gc time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // reading logs\n        time_tic = clock.now();\n        mlog = new mutation_log_private(logp, 1, gpid, _replica.get());\n        mlog->open([](int log_length, mutation_ptr &mu) -> bool { return true; }, nullptr);\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: read time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // learning\n        time_tic = clock.now();\n        learn_state state;\n        mlog->get_learn_state(gpid, durable_decree + 1, state);\n        mlog->close();\n        mlog = nullptr;\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: learn time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // replaying\n        time_tic = clock.now();\n        int64_t offset = 0;\n        std::set<decree> learned_decress;\n        mutation_log::replay(\n            state.files,\n            [&mutations, &learned_decress, &clock](int log_length, mutation_ptr &mu) -> bool {\n                // wait for 5 usec mimicing mutation replay time\n                auto tic = clock.now();\n                while (std::chrono::duration_cast<std::chrono::microseconds>(clock.now() - tic)\n                           .count() < 5) {\n                    ;\n                }\n                learned_decress.insert(mu->data.header.decree);\n\n                mutation_ptr wmu = mutations[mu->data.header.decree - 2];\n                EXPECT_TRUE(wmu->data.header == mu->data.header);\n                EXPECT_TRUE(wmu->data.updates.size() == mu->data.updates.size());\n                EXPECT_TRUE(wmu->data.updates[0].data.length() ==\n                            mu->data.updates[0].data.length());\n                EXPECT_TRUE(memcmp((const void *)wmu->data.updates[0].data.data(),\n                                   (const void *)mu->data.updates[0].data.data(),\n                                   mu->data.updates[0].data.length()) == 0);\n                EXPECT_TRUE(wmu->data.updates[0].code == mu->data.updates[0].code);\n                EXPECT_TRUE(wmu->client_requests.size() == mu->client_requests.size());\n\n                return true;\n            },\n            offset);\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: replay time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // checking\n        time_tic = clock.now();\n        for (decree s = durable_decree + 1; s < 1000; s++) {\n            auto it = learned_decress.find(s);\n            ASSERT_TRUE(it != learned_decress.end());\n        }\n        time_toc = clock.now();\n        std::cout\n            << \"learn_point[\" << lp << \"]: check time(us): \"\n            << std::chrono::duration_cast<std::chrono::microseconds>(time_toc - time_tic).count()\n            << std::endl;\n\n        // clear all\n        utils::filesystem::remove_path(logp);\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/mutation_log_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"replica/mutation_log.h\"\n#include \"replica_test_base.h\"\n\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\nusing namespace ::dsn::replication;\n\nstatic void copy_file(const char *from_file, const char *to_file, int64_t to_size = -1)\n{\n    int64_t from_size;\n    ASSERT_TRUE(dsn::utils::filesystem::file_size(from_file, from_size));\n    ASSERT_LE(to_size, from_size);\n    FILE *from = fopen(from_file, \"rb\");\n    ASSERT_TRUE(from != nullptr);\n    FILE *to = fopen(to_file, \"wb\");\n    ASSERT_TRUE(to != nullptr);\n    if (to_size == -1)\n        to_size = from_size;\n    if (to_size > 0) {\n        std::unique_ptr<char[]> buf(new char[to_size]);\n        auto n = fread(buf.get(), 1, to_size, from);\n        ASSERT_EQ(to_size, n);\n        n = fwrite(buf.get(), 1, to_size, to);\n        ASSERT_EQ(to_size, n);\n    }\n    int r = fclose(from);\n    ASSERT_EQ(0, r);\n    r = fclose(to);\n    ASSERT_EQ(0, r);\n}\n\nstatic void overwrite_file(const char *file, int offset, const void *buf, int size)\n{\n    FILE *f = fopen(file, \"r+b\");\n    ASSERT_TRUE(f != nullptr);\n    int r = fseek(f, offset, SEEK_SET);\n    ASSERT_EQ(0, r);\n    size_t n = fwrite(buf, 1, size, f);\n    ASSERT_EQ(size, n);\n    r = fclose(f);\n    ASSERT_EQ(0, r);\n}\n\nTEST(replication, log_file)\n{\n    replica_log_info_map mdecrees;\n    gpid gpid(1, 0);\n\n    mdecrees[gpid] = replica_log_info(3, 0);\n    std::string fpath = \"./log.1.100\";\n    int index = 1;\n    int64_t offset = 100;\n    std::string str = \"hello, world!\";\n    error_code err;\n    log_file_ptr lf = nullptr;\n\n    // write log\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(fpath));\n    lf = log_file::create_write(\".\", index, offset);\n    ASSERT_TRUE(lf != nullptr);\n    ASSERT_EQ(fpath, lf->path());\n    ASSERT_EQ(index, lf->index());\n    ASSERT_EQ(offset, lf->start_offset());\n    ASSERT_EQ(offset, lf->end_offset());\n    for (int i = 0; i < 100; i++) {\n        auto writer = new log_block();\n\n        if (i == 0) {\n            binary_writer temp_writer;\n            lf->write_file_header(temp_writer, mdecrees);\n            writer->add(temp_writer.get_buffer());\n            ASSERT_EQ(mdecrees, lf->previous_log_max_decrees());\n            log_file_header &h = lf->header();\n            ASSERT_EQ(100, h.start_global_offset);\n        }\n\n        binary_writer temp_writer;\n        temp_writer.write(str);\n        writer->add(temp_writer.get_buffer());\n\n        aio_task_ptr task =\n            lf->commit_log_block(*writer, offset, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n        task->wait();\n        ASSERT_EQ(ERR_OK, task->error());\n        ASSERT_EQ(writer->size(), task->get_transferred_size());\n\n        lf->flush();\n        offset += writer->size();\n\n        delete writer;\n    }\n    lf->close();\n    lf = nullptr;\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(fpath));\n\n    // file already exist\n    offset = 100;\n    lf = log_file::create_write(\".\", index, offset);\n    ASSERT_TRUE(lf == nullptr);\n\n    // invalid file name\n    lf = log_file::open_read(\"\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"a\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"aaaaa\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1.2.aaa\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1.2.removed\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1.\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log..2\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1a.2\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n    lf = log_file::open_read(\"log.1.2a\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_PARAMETERS, err);\n\n    // file not exist\n    lf = log_file::open_read(\"log.0.0\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_FILE_OPERATION_FAILED, err);\n\n    // bad file data: empty file\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.0\"));\n    copy_file(fpath.c_str(), \"log.1.0\", 0);\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.0\"));\n    lf = log_file::open_read(\"log.1.0\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_HANDLE_EOF, err);\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.0\"));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.0.removed\"));\n\n    // bad file data: incomplete log_block_header\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.1\"));\n    copy_file(fpath.c_str(), \"log.1.1\", sizeof(log_block_header) - 1);\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.1\"));\n    lf = log_file::open_read(\"log.1.1\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INCOMPLETE_DATA, err);\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.1\"));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.1.removed\"));\n\n    // bad file data: bad log_block_header (magic = 0xfeadbeef)\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.2\"));\n    copy_file(fpath.c_str(), \"log.1.2\");\n    int32_t bad_magic = 0xfeadbeef;\n    overwrite_file(\"log.1.2\", FIELD_OFFSET(log_block_header, magic), &bad_magic, sizeof(bad_magic));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.2\"));\n    lf = log_file::open_read(\"log.1.2\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_DATA, err);\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.2\"));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.2.removed\"));\n\n    // bad file data: bad log_block_header (crc check failed)\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.3\"));\n    copy_file(fpath.c_str(), \"log.1.3\");\n    int32_t bad_crc = 0;\n    overwrite_file(\"log.1.3\", FIELD_OFFSET(log_block_header, body_crc), &bad_crc, sizeof(bad_crc));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.3\"));\n    lf = log_file::open_read(\"log.1.3\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INVALID_DATA, err);\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.3\"));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.3.removed\"));\n\n    // bad file data: incomplete block body\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.4\"));\n    copy_file(fpath.c_str(), \"log.1.4\", sizeof(log_block_header) + 1);\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.4\"));\n    lf = log_file::open_read(\"log.1.4\", err);\n    ASSERT_TRUE(lf == nullptr);\n    ASSERT_EQ(ERR_INCOMPLETE_DATA, err);\n    ASSERT_TRUE(!dsn::utils::filesystem::file_exists(\"log.1.4\"));\n    ASSERT_TRUE(dsn::utils::filesystem::file_exists(\"log.1.4.removed\"));\n    ASSERT_TRUE(dsn::utils::filesystem::rename_path(\"log.1.4.removed\", \"log.1.4\"));\n\n    // read the file for test\n    offset = 100;\n    lf = log_file::open_read(fpath.c_str(), err);\n    ASSERT_NE(nullptr, lf);\n    EXPECT_EQ(ERR_OK, err);\n    ASSERT_EQ(1, lf->index());\n    ASSERT_EQ(100, lf->start_offset());\n    int64_t sz;\n    ASSERT_TRUE(dsn::utils::filesystem::file_size(fpath, sz));\n    ASSERT_EQ(lf->start_offset() + sz, lf->end_offset());\n\n    // read data\n    lf->reset_stream();\n    for (int i = 0; i < 100; i++) {\n        blob bb;\n        auto err2 = lf->read_next_log_block(bb);\n        ASSERT_EQ(ERR_OK, err2);\n\n        binary_reader reader(bb);\n\n        if (i == 0) {\n            lf->read_file_header(reader);\n            ASSERT_TRUE(lf->is_right_header());\n            ASSERT_EQ(100, lf->header().start_global_offset);\n        }\n\n        std::string ss;\n        reader.read(ss);\n        ASSERT_TRUE(ss == str);\n\n        offset += bb.length() + sizeof(log_block_header);\n    }\n\n    ASSERT_TRUE(offset == lf->end_offset());\n\n    blob bb;\n    err = lf->read_next_log_block(bb);\n    ASSERT_TRUE(err == ERR_HANDLE_EOF);\n\n    lf = nullptr;\n\n    utils::filesystem::remove_path(fpath);\n}\n\nnamespace dsn {\nnamespace replication {\n\nclass mutation_log_test : public replica_test_base\n{\npublic:\n    mutation_log_test() {}\n\n    void SetUp() override\n    {\n        utils::filesystem::remove_path(_log_dir);\n        utils::filesystem::create_directory(_log_dir);\n\n        utils::filesystem::remove_path(_log_dir + \".test\");\n    }\n\n    void TearDown() override { utils::filesystem::remove_path(_log_dir); }\n\n    mutation_ptr create_test_mutation(decree d, const std::string &data) override\n    {\n        mutation_ptr mu(new mutation());\n        mu->data.header.ballot = 1;\n        mu->data.header.decree = d;\n        mu->data.header.pid = get_gpid();\n        mu->data.header.last_committed_decree = d - 1;\n        mu->data.header.log_offset = 0;\n\n        binary_writer writer;\n        for (int j = 0; j < 100; j++) {\n            writer.write(data);\n        }\n        mu->data.updates.emplace_back(mutation_update());\n        mu->data.updates.back().code = RPC_REPLICATION_WRITE_EMPTY;\n        mu->data.updates.back().data = writer.get_buffer();\n\n        mu->client_requests.push_back(nullptr);\n\n        return mu;\n    }\n\n    static void ASSERT_BLOB_EQ(const blob &lhs, const blob &rhs)\n    {\n        ASSERT_EQ(std::string(lhs.data(), lhs.length()), std::string(rhs.data(), rhs.length()));\n    }\n\n    // return number of entries written\n    int generate_multiple_log_files(uint files_num = 3)\n    {\n        // decree ranges from [1, files_num*10)\n        for (int f = 0; f < files_num; f++) {\n            // each round mlog will replay the former logs, and create new file\n            mutation_log_ptr mlog = create_private_log();\n            for (int i = 1; i <= 10; i++) {\n                std::string msg = \"hello!\";\n                mutation_ptr mu = create_test_mutation(10 * f + i, msg);\n                mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            }\n            mlog->tracker()->wait_outstanding_tasks();\n            mlog->close();\n        }\n        return static_cast<int>(files_num * 10);\n    }\n\n    mutation_log_ptr create_private_log() { return create_private_log(1); }\n\n    mutation_log_ptr create_private_log(int private_log_size_mb, decree replay_start_decree = 0)\n    {\n        gpid id = get_gpid();\n        std::map<gpid, decree> replay_condition;\n        replay_condition[id] = replay_start_decree;\n        mutation_log::replay_callback cb = [](int, mutation_ptr &) { return true; };\n        mutation_log_ptr mlog;\n\n        int try_cnt = 0;\n        while (try_cnt < 5) {\n            try_cnt++;\n            mlog =\n                new mutation_log_private(_replica->dir(), private_log_size_mb, id, _replica.get());\n            error_code err = mlog->open(cb, nullptr, replay_condition);\n            if (err == ERR_OK) {\n                break;\n            }\n            derror_f(\"mlog open failed, encountered error: {}\", err);\n        }\n        EXPECT_NE(mlog, nullptr);\n        return mlog;\n    }\n\n    void test_replay_single_file(int num_entries)\n    {\n        std::vector<mutation_ptr> mutations;\n\n        { // writing logs\n            mutation_log_ptr mlog = create_private_log();\n\n            for (int i = 0; i < num_entries; i++) {\n                mutation_ptr mu = create_test_mutation(2 + i, \"hello!\");\n                mutations.push_back(mu);\n                mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            }\n        }\n\n        { // replaying logs\n            std::string log_file_path = _log_dir + \"/log.1.0\";\n\n            error_code ec;\n            log_file_ptr file = log_file::open_read(log_file_path.c_str(), ec);\n            ASSERT_EQ(ec, ERR_OK) << ec.to_string();\n\n            int64_t end_offset;\n            int mutation_index = -1;\n            ec = mutation_log::replay(\n                file,\n                [&mutations, &mutation_index](int log_length, mutation_ptr &mu) -> bool {\n                    mutation_ptr wmu = mutations[++mutation_index];\n                    EXPECT_EQ(wmu->data.header, mu->data.header);\n                    EXPECT_EQ(wmu->data.updates.size(), mu->data.updates.size());\n                    ASSERT_BLOB_EQ(wmu->data.updates[0].data, mu->data.updates[0].data);\n                    EXPECT_EQ(wmu->data.updates[0].code, mu->data.updates[0].code);\n                    EXPECT_EQ(wmu->client_requests.size(), mu->client_requests.size());\n                    return true;\n                },\n                end_offset);\n            ASSERT_EQ(ec, ERR_HANDLE_EOF) << ec.to_string();\n        }\n    }\n\n    void test_replay_multiple_files(int num_entries, int private_log_file_size_mb)\n    {\n        std::vector<mutation_ptr> mutations;\n\n        { // writing logs\n            mutation_log_ptr mlog = create_private_log(private_log_file_size_mb);\n            for (int i = 0; i < num_entries; i++) {\n                mutation_ptr mu = create_test_mutation(2 + i, \"hello!\");\n                mutations.push_back(mu);\n                mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n            }\n        }\n\n        { // reading logs\n            mutation_log_ptr mlog = create_private_log(private_log_file_size_mb);\n\n            std::vector<std::string> log_files;\n            ASSERT_TRUE(utils::filesystem::get_subfiles(mlog->dir(), log_files, false));\n\n            int64_t end_offset;\n            int mutation_index = -1;\n            mutation_log::replay(\n                log_files,\n                [&mutations, &mutation_index](int log_length, mutation_ptr &mu) -> bool {\n                    mutation_ptr wmu = mutations[++mutation_index];\n                    EXPECT_EQ(wmu->data.header, mu->data.header);\n                    EXPECT_EQ(wmu->data.updates.size(), mu->data.updates.size());\n                    ASSERT_BLOB_EQ(wmu->data.updates[0].data, mu->data.updates[0].data);\n                    EXPECT_EQ(wmu->data.updates[0].code, mu->data.updates[0].code);\n                    EXPECT_EQ(wmu->client_requests.size(), mu->client_requests.size());\n                    return true;\n                },\n                end_offset);\n            ASSERT_EQ(mutation_index + 1, (int)mutations.size());\n\n            ASSERT_GE(log_files.size(), 1);\n        }\n    }\n};\n\nTEST_F(mutation_log_test, replay_single_file_1000) { test_replay_single_file(1000); }\n\nTEST_F(mutation_log_test, replay_single_file_2000) { test_replay_single_file(2000); }\n\nTEST_F(mutation_log_test, replay_single_file_5000) { test_replay_single_file(5000); }\n\nTEST_F(mutation_log_test, replay_single_file_10000) { test_replay_single_file(10000); }\n\nTEST_F(mutation_log_test, replay_single_file_1) { test_replay_single_file(1); }\n\nTEST_F(mutation_log_test, replay_single_file_10) { test_replay_single_file(10); }\n\n// mutation_log::open\nTEST_F(mutation_log_test, open)\n{\n    std::vector<mutation_ptr> mutations;\n\n    { // writing logs\n        mutation_log_ptr mlog = create_private_log(4);\n\n        for (int i = 0; i < 1000; i++) {\n            mutation_ptr mu = create_test_mutation(2 + i, \"hello!\");\n            mutations.push_back(mu);\n            mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n        }\n    }\n\n    { // reading logs\n        mutation_log_ptr mlog = new mutation_log_private(_log_dir, 4, get_gpid(), _replica.get());\n\n        int mutation_index = -1;\n        mlog->open(\n            [&mutations, &mutation_index](int log_length, mutation_ptr &mu) -> bool {\n                mutation_ptr wmu = mutations[++mutation_index];\n                EXPECT_EQ(wmu->data.header, mu->data.header);\n                EXPECT_EQ(wmu->data.updates.size(), mu->data.updates.size());\n                ASSERT_BLOB_EQ(wmu->data.updates[0].data, mu->data.updates[0].data);\n                EXPECT_EQ(wmu->data.updates[0].code, mu->data.updates[0].code);\n                EXPECT_EQ(wmu->client_requests.size(), mu->client_requests.size());\n                return true;\n            },\n            nullptr);\n        ASSERT_EQ(mutation_index + 1, (int)mutations.size());\n    }\n}\n\nTEST_F(mutation_log_test, replay_multiple_files_10000_1mb) { test_replay_multiple_files(10000, 1); }\n\nTEST_F(mutation_log_test, replay_multiple_files_20000_1mb) { test_replay_multiple_files(20000, 1); }\n\nTEST_F(mutation_log_test, replay_multiple_files_50000_1mb) { test_replay_multiple_files(50000, 1); }\n\nTEST_F(mutation_log_test, replay_start_decree)\n{\n    // decree ranges from [1, 30)\n    generate_multiple_log_files(3);\n\n    decree replay_start_decree = 11; // start replay from second file, the first file is ignored.\n    mutation_log_ptr mlog = create_private_log(1, replay_start_decree);\n\n    // ensure the first file is not stripped out.\n    ASSERT_EQ(mlog->max_gced_decree(get_gpid()), 0);\n    ASSERT_EQ(mlog->get_log_file_map().size(), 3);\n}\n\nTEST_F(mutation_log_test, reset_from)\n{\n    std::vector<mutation_ptr> expected;\n    { // writing logs\n        mutation_log_ptr mlog = new mutation_log_private(_log_dir, 4, get_gpid(), _replica.get());\n\n        EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n\n        for (int i = 0; i < 10; i++) {\n            mutation_ptr mu = create_test_mutation(2 + i, \"hello!\");\n            expected.push_back(mu);\n            mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n        }\n        mlog->flush();\n\n        ASSERT_TRUE(utils::filesystem::rename_path(_log_dir, _log_dir + \".tmp\"));\n    }\n\n    ASSERT_TRUE(utils::filesystem::directory_exists(_log_dir + \".tmp\"));\n    ASSERT_FALSE(utils::filesystem::directory_exists(_log_dir));\n\n    // create another set of logs\n    mutation_log_ptr mlog = new mutation_log_private(_log_dir, 4, get_gpid(), _replica.get());\n    EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n    for (int i = 0; i < 1000; i++) {\n        mutation_ptr mu = create_test_mutation(2000 + i, \"hello!\");\n        mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n    }\n    mlog->flush();\n\n    // reset from the tmp log dir.\n    std::vector<mutation_ptr> actual;\n    auto err = mlog->reset_from(_log_dir + \".tmp\",\n                                [&](int, mutation_ptr &mu) -> bool {\n                                    actual.push_back(mu);\n                                    return true;\n                                },\n                                [](error_code err) { ASSERT_EQ(err, ERR_OK); });\n    ASSERT_EQ(err, ERR_OK);\n    ASSERT_EQ(actual.size(), expected.size());\n\n    // the tmp dir has been removed.\n    ASSERT_FALSE(utils::filesystem::directory_exists(_log_dir + \".tmp\"));\n    ASSERT_TRUE(utils::filesystem::directory_exists(_log_dir));\n}\n\n// multi-threaded testing. ensure reset_from will wait until\n// all previous writes complete.\nTEST_F(mutation_log_test, reset_from_while_writing)\n{\n    std::vector<mutation_ptr> expected;\n    { // writing logs\n        mutation_log_ptr mlog = new mutation_log_private(_log_dir, 4, get_gpid(), _replica.get());\n        EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n\n        for (int i = 0; i < 10; i++) {\n            mutation_ptr mu = create_test_mutation(2 + i, \"hello!\");\n            expected.push_back(mu);\n            mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, nullptr, nullptr, 0);\n        }\n        mlog->flush();\n\n        ASSERT_TRUE(utils::filesystem::rename_path(_log_dir, _log_dir + \".test\"));\n    }\n\n    // create another set of logs\n    mutation_log_ptr mlog = new mutation_log_private(_log_dir, 4, get_gpid(), _replica.get());\n    EXPECT_EQ(mlog->open(nullptr, nullptr), ERR_OK);\n\n    // given with a large number of mutation to ensure\n    // plog::reset_from will face many uncompleted writes.\n    for (int i = 0; i < 1000 * 100; i++) {\n        mutation_ptr mu = create_test_mutation(2000 + i, \"hello!\");\n        mlog->append(mu, LPC_AIO_IMMEDIATE_CALLBACK, mlog->tracker(), nullptr, 0);\n    }\n\n    // reset from the tmp log dir.\n    std::vector<mutation_ptr> actual;\n    auto err = mlog->reset_from(_log_dir + \".test\",\n                                [&](int, mutation_ptr &mu) -> bool {\n                                    actual.push_back(mu);\n                                    return true;\n                                },\n                                [](error_code err) { ASSERT_EQ(err, ERR_OK); });\n    ASSERT_EQ(err, ERR_OK);\n\n    mlog->flush();\n    ASSERT_EQ(actual.size(), expected.size());\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/open_replica_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include \"replica/replica.h\"\n#include \"mock_utils.h\"\n#include \"meta/meta_data.h\"\n#include \"replica_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass open_replica_test : public replica_test_base\n{\npublic:\n    open_replica_test() = default;\n    ~open_replica_test() { dsn::utils::filesystem::remove_path(\"./tmp_dir\"); }\n\n    void test_open_replica()\n    {\n        app_info app_info;\n        app_info.app_type = \"replica\";\n        app_info.is_stateful = true;\n        app_info.max_replica_count = 3;\n        app_info.partition_count = 8;\n        app_info.app_id = 1;\n\n        struct test_data\n        {\n            ballot b;\n            decree last_committed_decree;\n            bool is_in_dir_nodes;\n            bool exec_failed;\n        } tests[] = {\n            {0, 0, true, true}, {0, 0, false, false}, {5, 5, true, true}, {5, 5, false, true},\n        };\n        int i = 0;\n        for (auto tt : tests) {\n            gpid gpid(app_info.app_id, i);\n            stub->_opening_replicas[gpid] = task_ptr(nullptr);\n\n            dsn::rpc_address node;\n            node.assign_ipv4(\"127.0.0.11\", static_cast<uint16_t>(12321 + i + 1));\n\n            if (!tt.is_in_dir_nodes) {\n                dir_node *node_disk = new dir_node(\"tag_\" + std::to_string(i), \"tmp_dir\");\n                stub->_fs_manager._dir_nodes.emplace_back(node_disk);\n                stub->_fs_manager._available_data_dirs.emplace_back(\"tmp_dir\");\n            }\n\n            _replica->register_service();\n            mock_mutation_log_shared_ptr shared_log_mock =\n                new mock_mutation_log_shared(\"./tmp_dir\");\n            stub->set_log(shared_log_mock);\n            partition_configuration config;\n            config.pid = gpid;\n            config.ballot = tt.b;\n            config.last_committed_decree = tt.last_committed_decree;\n            std::shared_ptr<app_state> _the_app = app_state::create(app_info);\n\n            configuration_update_request fake_request;\n            fake_request.info = *_the_app;\n            fake_request.config = config;\n            fake_request.type = config_type::CT_ASSIGN_PRIMARY;\n            fake_request.node = node;\n\n            std::shared_ptr<configuration_update_request> req2(new configuration_update_request);\n            *req2 = fake_request;\n            if (tt.exec_failed) {\n                ASSERT_DEATH(stub->open_replica(app_info, gpid, nullptr, req2), \"\");\n            } else {\n                stub->open_replica(app_info, gpid, nullptr, req2);\n            }\n            ++i;\n        }\n    }\n};\n\nTEST_F(open_replica_test, open_replica_add_decree_and_ballot_check) { test_open_replica(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_disk_migrate_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"replica/test/replica_disk_test_base.h\"\n#include \"replica/replica_disk_migrator.h\"\n\nnamespace dsn {\nnamespace replication {\nusing disk_migrate_rpc = rpc_holder<replica_disk_migrate_request, replica_disk_migrate_response>;\n\n// this test is based the node disk mock of replica_disk_test_base, please see the mock disk\n// information in replica_disk_test_base\nclass replica_disk_migrate_test : public replica_disk_test_base\n{\npublic:\n    replica_disk_migrate_rpc fake_migrate_rpc;\n\npublic:\n    void SetUp() override { generate_fake_rpc(); }\n\n    replica_ptr get_replica(const dsn::gpid &pid) const\n    {\n        replica_ptr rep = stub->get_replica(pid);\n        return rep;\n    }\n\n    void set_replica_status(const dsn::gpid &pid, partition_status::type status) const\n    {\n        get_replica(pid)->_config.status = status;\n    }\n\n    void set_migration_status(const dsn::gpid &pid, const disk_migration_status::type &status)\n    {\n        replica_ptr rep = get_replica(pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->set_status(status);\n    }\n\n    void set_replica_dir(const dsn::gpid &pid, const std::string &dir)\n    {\n        replica_ptr rep = get_replica(pid);\n        ASSERT_TRUE(rep);\n        rep->_dir = dir;\n    }\n\n    void set_replica_target_dir(const dsn::gpid &pid, const std::string &dir)\n    {\n        replica_ptr rep = get_replica(pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->_target_replica_dir = dir;\n    }\n\n    void check_migration_args(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->check_migration_args(fake_migrate_rpc);\n    }\n\n    void init_migration_target_dir(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->init_target_dir(rpc.request());\n    }\n\n    void migrate_replica_checkpoint(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->migrate_replica_checkpoint(rpc.request());\n    }\n\n    void migrate_replica_app_info(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        ASSERT_TRUE(rep);\n        rep->disk_migrator()->migrate_replica_app_info(rpc.request());\n    }\n\n    dsn::task_ptr close_current_replica(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        return rep->disk_migrator()->close_current_replica(rpc.request());\n    }\n\n    void update_replica_dir(replica_disk_migrate_rpc &rpc)\n    {\n        replica_ptr rep = get_replica(rpc.request().pid);\n        rep->disk_migrator()->update_replica_dir();\n    }\n\n    void open_replica(const app_info &app, gpid id)\n    {\n        stub->open_replica(app, id, nullptr, nullptr);\n    }\n\nprivate:\n    void generate_fake_rpc()\n    {\n        // create RPC_REPLICA_DISK_MIGRATE fake request\n        auto migrate_request = dsn::make_unique<replica_disk_migrate_request>();\n        fake_migrate_rpc = disk_migrate_rpc(std::move(migrate_request), RPC_REPLICA_DISK_MIGRATE);\n    }\n};\n\nTEST_F(replica_disk_migrate_test, on_migrate_replica)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n    auto &response = fake_migrate_rpc.response();\n\n    // replica not existed\n    request.pid = dsn::gpid(app_info_1.app_id, 100);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_2\";\n    stub->on_disk_migrate(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_OBJECT_NOT_FOUND);\n\n    request.pid = dsn::gpid(app_info_1.app_id, 2);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_2\";\n    stub->on_disk_migrate(fake_migrate_rpc);\n    get_replica(request.pid)->tracker()->wait_outstanding_tasks();\n    ASSERT_EQ(response.err, ERR_OK);\n}\n\nTEST_F(replica_disk_migrate_test, migrate_disk_replica_check)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n    auto &response = fake_migrate_rpc.response();\n\n    request.pid = dsn::gpid(app_info_1.app_id, 1);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_2\";\n\n    // check existed task\n    set_migration_status(request.pid, disk_migration_status::MOVING);\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_BUSY);\n    set_migration_status(fake_migrate_rpc.request().pid,\n                         disk_migration_status::IDLE); // revert IDLE status\n\n    // check invalid partition status\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_INVALID_STATE);\n\n    // check same disk\n    request.pid = dsn::gpid(app_info_1.app_id, 2);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_1\";\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_INVALID_PARAMETERS);\n\n    // check invalid origin disk\n    request.origin_disk = \"tag_100\";\n    request.target_disk = \"tag_0\";\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_OBJECT_NOT_FOUND);\n    // check invalid target disk\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_200\";\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_OBJECT_NOT_FOUND);\n\n    // check replica doesn't existed origin disk\n    request.origin_disk = \"tag_empty_1\";\n    request.target_disk = \"tag_6\";\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_OBJECT_NOT_FOUND);\n    // check replica has existed on target disk\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_new\";\n    generate_mock_dir_node(app_info_1, request.pid, request.target_disk);\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_PATH_ALREADY_EXIST);\n    remove_mock_dir_node(request.target_disk);\n\n    // check passed\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_empty_1\";\n    ASSERT_EQ(get_replica(request.pid)->disk_migrator()->status(), disk_migration_status::IDLE);\n    check_migration_args(fake_migrate_rpc);\n    ASSERT_EQ(response.err, ERR_OK);\n}\n\nTEST_F(replica_disk_migrate_test, disk_migrate_replica_run)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n\n    request.pid = dsn::gpid(app_info_1.app_id, 2);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_empty_1\";\n    set_replica_dir(request.pid,\n                    fmt::format(\"./{}/{}.replica\", request.origin_disk, request.pid.to_string()));\n    set_migration_status(request.pid, disk_migration_status::MOVING);\n\n    const std::string kTargetReplicaDir = fmt::format(\n        \"./{}/{}.replica.disk.migrate.tmp/\", request.target_disk, request.pid.to_string());\n\n    const std::string kTargetDataDir = fmt::format(\n        \"./{}/{}.replica.disk.migrate.tmp/data/rdb/\", request.target_disk, request.pid.to_string());\n    const std::string kTargetCheckPointFile =\n        fmt::format(\"./{}/{}.replica.disk.migrate.tmp/data/rdb/checkpoint.file\",\n                    request.target_disk,\n                    request.pid.to_string());\n    const std::string kTargetInitInfoFile = fmt::format(\"./{}/{}.replica.disk.migrate.tmp/{}\",\n                                                        request.target_disk,\n                                                        request.pid.to_string(),\n                                                        replica_init_info::kInitInfo);\n    const std::string kTargetAppInfoFile = fmt::format(\"./{}/{}.replica.disk.migrate.tmp/{}\",\n                                                       request.target_disk,\n                                                       request.pid.to_string(),\n                                                       replica::kAppInfo);\n\n    init_migration_target_dir(fake_migrate_rpc);\n    ASSERT_TRUE(utils::filesystem::directory_exists(kTargetDataDir));\n\n    migrate_replica_checkpoint(fake_migrate_rpc);\n    ASSERT_TRUE(utils::filesystem::file_exists(kTargetCheckPointFile));\n\n    migrate_replica_app_info(fake_migrate_rpc);\n    ASSERT_TRUE(utils::filesystem::file_exists(kTargetInitInfoFile));\n    ASSERT_TRUE(utils::filesystem::file_exists(kTargetAppInfoFile));\n\n    // remove test tmp path\n    utils::filesystem::remove_path(kTargetReplicaDir);\n\n    fail::cfg(\"init_target_dir\", \"return()\");\n    fail::cfg(\"migrate_replica_checkpoint\", \"return()\");\n    fail::cfg(\"migrate_replica_app_info\", \"return()\");\n\n    const auto replica_ptr = get_replica(request.pid);\n\n    set_migration_status(request.pid, disk_migration_status::MOVING);\n    init_migration_target_dir(fake_migrate_rpc);\n    ASSERT_FALSE(utils::filesystem::directory_exists(kTargetDataDir));\n    ASSERT_EQ(replica_ptr->disk_migrator()->status(), disk_migration_status::IDLE);\n\n    set_migration_status(request.pid, disk_migration_status::MOVING);\n    migrate_replica_checkpoint(fake_migrate_rpc);\n    ASSERT_FALSE(utils::filesystem::file_exists(kTargetCheckPointFile));\n    ASSERT_EQ(replica_ptr->disk_migrator()->status(), disk_migration_status::IDLE);\n\n    set_migration_status(request.pid, disk_migration_status::MOVING);\n    migrate_replica_app_info(fake_migrate_rpc);\n    ASSERT_FALSE(utils::filesystem::file_exists(kTargetInitInfoFile));\n    ASSERT_FALSE(utils::filesystem::file_exists(kTargetAppInfoFile));\n    ASSERT_EQ(replica_ptr->disk_migrator()->status(), disk_migration_status::IDLE);\n}\n\nTEST_F(replica_disk_migrate_test, disk_migrate_replica_close)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n    request.pid = dsn::gpid(app_info_1.app_id, 2);\n\n    // test invalid replica status\n    set_replica_status(request.pid, partition_status::PS_PRIMARY);\n    ASSERT_FALSE(close_current_replica(fake_migrate_rpc));\n\n    // test valid replica status\n    set_migration_status(request.pid, disk_migration_status::MOVED);\n    set_replica_status(request.pid, partition_status::PS_SECONDARY);\n    ASSERT_TRUE(close_current_replica(fake_migrate_rpc));\n}\n\nTEST_F(replica_disk_migrate_test, disk_migrate_replica_update)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n    request.pid = dsn::gpid(app_info_1.app_id, 3);\n    request.origin_disk = \"tag_1\";\n    request.target_disk = \"tag_empty_1\";\n\n    const std::string kReplicaOriginDir =\n        fmt::format(\"./{}/{}.replica\", request.origin_disk, request.pid.to_string());\n    const std::string kReplicaNewTempDir = fmt::format(\n        \"./{}/{}.replica.disk.migrate.tmp/\", request.target_disk, request.pid.to_string());\n    const std::string kReplicaOriginSuffixDir = fmt::format(\n        \"./{}/{}.replica.disk.migrate.ori/\", request.origin_disk, request.pid.to_string());\n    const std::string kReplicaNewDir =\n        fmt::format(\"./{}/{}.replica/\", request.target_disk, request.pid.to_string());\n\n    utils::filesystem::create_directory(kReplicaOriginDir);\n    utils::filesystem::create_directory(kReplicaNewTempDir);\n\n    // replica dir is error, rename origin dir to \"*.ori\" failed\n    set_replica_dir(request.pid, \"error\");\n    update_replica_dir(fake_migrate_rpc);\n    ASSERT_EQ(get_replica(request.pid)->disk_migrator()->status(), disk_migration_status::IDLE);\n\n    // replica target dir is error, rename \"*.tmp\" dir failed\n    set_replica_dir(request.pid, kReplicaOriginDir);\n    set_replica_target_dir(request.pid, \"error\");\n    update_replica_dir(fake_migrate_rpc);\n    ASSERT_EQ(get_replica(request.pid)->disk_migrator()->status(), disk_migration_status::IDLE);\n    ASSERT_TRUE(utils::filesystem::directory_exists(kReplicaOriginDir));\n    ASSERT_FALSE(utils::filesystem::directory_exists(kReplicaOriginSuffixDir));\n\n    // update success\n    set_replica_target_dir(request.pid, kReplicaNewTempDir);\n    update_replica_dir(fake_migrate_rpc);\n    ASSERT_TRUE(utils::filesystem::directory_exists(kReplicaOriginSuffixDir));\n    ASSERT_TRUE(utils::filesystem::directory_exists(kReplicaNewDir));\n    utils::filesystem::remove_path(fmt::format(\"./{}/\", request.origin_disk));\n    utils::filesystem::remove_path(fmt::format(\"./{}/\", request.target_disk));\n    for (const auto &node_disk : get_dir_nodes()) {\n        if (node_disk->tag == request.origin_disk) {\n            auto gpids = node_disk->holding_replicas[app_info_1.app_id];\n            ASSERT_TRUE(gpids.find(request.pid) == gpids.end());\n            continue;\n        }\n\n        if (node_disk->tag == request.target_disk) {\n            auto gpids = node_disk->holding_replicas[app_info_1.app_id];\n            ASSERT_TRUE(gpids.find(request.pid) != gpids.end());\n            continue;\n        }\n    }\n}\n\nTEST_F(replica_disk_migrate_test, disk_migrate_replica_open)\n{\n    auto &request = *fake_migrate_rpc.mutable_request();\n    request.pid = dsn::gpid(app_info_1.app_id, 4);\n    request.origin_disk = \"tag_2\";\n    request.target_disk = \"tag_empty_1\";\n\n    remove_mock_dir_node(request.origin_disk);\n    const std::string kReplicaOriginSuffixDir = fmt::format(\n        \"./{}/{}.replica.disk.migrate.ori/\", request.origin_disk, request.pid.to_string());\n    const std::string kReplicaNewDir =\n        fmt::format(\"./{}/{}.replica/\", request.target_disk, request.pid.to_string());\n    utils::filesystem::create_directory(kReplicaOriginSuffixDir);\n    utils::filesystem::create_directory(kReplicaNewDir);\n\n    fail::cfg(\"mock_replica_load\", \"return()\");\n    const std::string kReplicaOriginDir =\n        fmt::format(\"./{}/{}.replica\", request.origin_disk, request.pid.to_string());\n    const std::string kReplicaGarDir =\n        fmt::format(\"./{}/{}.replica.gar\", request.target_disk, request.pid.to_string());\n    open_replica(app_info_1, request.pid);\n\n    ASSERT_TRUE(utils::filesystem::directory_exists(kReplicaOriginDir));\n    ASSERT_TRUE(utils::filesystem::directory_exists(kReplicaGarDir));\n\n    utils::filesystem::remove_path(kReplicaOriginDir);\n    utils::filesystem::remove_path(kReplicaGarDir);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_disk_test.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n\n#include \"replica_disk_test_base.h\"\n#include \"replica/disk_cleaner.h\"\n\nnamespace dsn {\nnamespace replication {\n\nusing query_disk_info_rpc = rpc_holder<query_disk_info_request, query_disk_info_response>;\n\nclass replica_disk_test : public replica_disk_test_base\n{\npublic:\n    query_disk_info_rpc fake_query_disk_rpc;\n\npublic:\n    void SetUp() override {}\n\n    void generate_fake_rpc()\n    {\n        // create RPC_QUERY_DISK_INFO fake request\n        auto query_request = dsn::make_unique<query_disk_info_request>();\n        fake_query_disk_rpc = query_disk_info_rpc(std::move(query_request), RPC_QUERY_DISK_INFO);\n    }\n\n    error_code send_add_new_disk_rpc(const std::string disk_str)\n    {\n        auto add_disk_request = dsn::make_unique<add_new_disk_request>();\n        add_disk_request->disk_str = disk_str;\n        auto rpc = add_new_disk_rpc(std::move(add_disk_request), RPC_QUERY_DISK_INFO);\n        stub->on_add_new_disk(rpc);\n        error_code err = rpc.response().err;\n        if (err != ERR_OK) {\n            ddebug_f(\"error msg: {}\", rpc.response().err_hint);\n        }\n        return err;\n    }\n};\n\nTEST_F(replica_disk_test, on_query_disk_info_all_app)\n{\n    generate_fake_rpc();\n    stub->on_query_disk_info(fake_query_disk_rpc);\n\n    query_disk_info_response &disk_info_response = fake_query_disk_rpc.response();\n    // test response disk_info\n    ASSERT_EQ(disk_info_response.total_capacity_mb, 2500);\n    ASSERT_EQ(disk_info_response.total_available_mb, 750);\n\n    auto &disk_infos = disk_info_response.disk_infos;\n    ASSERT_EQ(disk_infos.size(), 6);\n\n    int info_size = disk_infos.size();\n    int app_id_1_partition_index = 1;\n    int app_id_2_partition_index = 1;\n    for (int i = 0; i < info_size; i++) {\n        if (disk_infos[i].tag == \"tag_empty_1\") {\n            continue;\n        }\n        ASSERT_EQ(disk_infos[i].tag, \"tag_\" + std::to_string(i + 1));\n        ASSERT_EQ(disk_infos[i].full_dir, \"./tag_\" + std::to_string(i + 1));\n        ASSERT_EQ(disk_infos[i].disk_capacity_mb, 500);\n        ASSERT_EQ(disk_infos[i].disk_available_mb, (i + 1) * 50);\n        // `holding_primary_replicas` and `holding_secondary_replicas` is std::map<app_id,\n        // std::set<::dsn::gpid>>\n        ASSERT_EQ(disk_infos[i].holding_primary_replicas.size(), 2);\n        ASSERT_EQ(disk_infos[i].holding_secondary_replicas.size(), 2);\n\n        // test the gpid of app_id_1\n        // test primary\n        ASSERT_EQ(disk_infos[i].holding_primary_replicas[app_info_1.app_id].size(),\n                  app_id_1_primary_count_for_disk);\n        for (std::set<gpid>::iterator it =\n                 disk_infos[i].holding_primary_replicas[app_info_1.app_id].begin();\n             it != disk_infos[i].holding_primary_replicas[app_info_1.app_id].end();\n             it++) {\n            ASSERT_EQ(it->get_app_id(), app_info_1.app_id);\n            ASSERT_EQ(it->get_partition_index(), app_id_1_partition_index++);\n        }\n        // test secondary\n        ASSERT_EQ(disk_infos[i].holding_secondary_replicas[app_info_1.app_id].size(),\n                  app_id_1_secondary_count_for_disk);\n        for (std::set<gpid>::iterator it =\n                 disk_infos[i].holding_secondary_replicas[app_info_1.app_id].begin();\n             it != disk_infos[i].holding_secondary_replicas[app_info_1.app_id].end();\n             it++) {\n            ASSERT_EQ(it->get_app_id(), app_info_1.app_id);\n            ASSERT_EQ(it->get_partition_index(), app_id_1_partition_index++);\n        }\n\n        // test the gpid of app_id_2\n        // test primary\n        ASSERT_EQ(disk_infos[i].holding_primary_replicas[app_info_2.app_id].size(),\n                  app_id_2_primary_count_for_disk);\n        for (std::set<gpid>::iterator it =\n                 disk_infos[i].holding_primary_replicas[app_info_2.app_id].begin();\n             it != disk_infos[i].holding_primary_replicas[app_info_2.app_id].end();\n             it++) {\n            ASSERT_EQ(it->get_app_id(), app_info_2.app_id);\n            ASSERT_EQ(it->get_partition_index(), app_id_2_partition_index++);\n        }\n        // test secondary\n        ASSERT_EQ(disk_infos[i].holding_secondary_replicas[app_info_2.app_id].size(),\n                  app_id_2_secondary_count_for_disk);\n        for (std::set<gpid>::iterator it =\n                 disk_infos[i].holding_secondary_replicas[app_info_2.app_id].begin();\n             it != disk_infos[i].holding_secondary_replicas[app_info_2.app_id].end();\n             it++) {\n            ASSERT_EQ(it->get_app_id(), app_info_2.app_id);\n            ASSERT_EQ(it->get_partition_index(), app_id_2_partition_index++);\n        }\n    }\n}\n\nTEST_F(replica_disk_test, on_query_disk_info_app_not_existed)\n{\n    generate_fake_rpc();\n    query_disk_info_request &request = *fake_query_disk_rpc.mutable_request();\n    request.app_name = \"not_existed_app\";\n    stub->on_query_disk_info(fake_query_disk_rpc);\n    ASSERT_EQ(fake_query_disk_rpc.response().err, ERR_OBJECT_NOT_FOUND);\n}\n\nTEST_F(replica_disk_test, on_query_disk_info_one_app)\n{\n    generate_fake_rpc();\n    query_disk_info_request &request = *fake_query_disk_rpc.mutable_request();\n\n    request.app_name = app_info_1.app_name;\n    stub->on_query_disk_info(fake_query_disk_rpc);\n\n    auto &disk_infos_with_app_1 = fake_query_disk_rpc.response().disk_infos;\n    int info_size = disk_infos_with_app_1.size();\n    for (int i = 0; i < info_size; i++) {\n        if (disk_infos_with_app_1[i].tag == \"tag_empty_1\") {\n            continue;\n        }\n        // `holding_primary_replicas` and `holding_secondary_replicas` is std::map<app_id,\n        // std::set<::dsn::gpid>>\n        ASSERT_EQ(disk_infos_with_app_1[i].holding_primary_replicas.size(), 1);\n        ASSERT_EQ(disk_infos_with_app_1[i].holding_secondary_replicas.size(), 1);\n        ASSERT_EQ(disk_infos_with_app_1[i].holding_primary_replicas[app_info_1.app_id].size(),\n                  app_id_1_primary_count_for_disk);\n        ASSERT_EQ(disk_infos_with_app_1[i].holding_secondary_replicas[app_info_1.app_id].size(),\n                  app_id_1_secondary_count_for_disk);\n        ASSERT_TRUE(disk_infos_with_app_1[i].holding_primary_replicas.find(app_info_2.app_id) ==\n                    disk_infos_with_app_1[i].holding_primary_replicas.end());\n        ASSERT_TRUE(disk_infos_with_app_1[i].holding_secondary_replicas.find(app_info_2.app_id) ==\n                    disk_infos_with_app_1[i].holding_secondary_replicas.end());\n    }\n}\n\nTEST_F(replica_disk_test, gc_disk_useless_dir)\n{\n    FLAGS_gc_disk_error_replica_interval_seconds = 1;\n    FLAGS_gc_disk_garbage_replica_interval_seconds = 1;\n    FLAGS_gc_disk_migration_origin_replica_interval_seconds = 1;\n    FLAGS_gc_disk_migration_tmp_replica_interval_seconds = 1;\n\n    std::vector<std::string> tests{\n        \"./replica1.err\",\n        \"./replica2.err\",\n        \"./replica.gar\",\n        \"./replica.tmp\",\n        \"./replica.ori\",\n        \"./replica.bak\",\n        \"./replica.1.1\",\n    };\n\n    for (const auto &test : tests) {\n        utils::filesystem::create_directory(test);\n        ASSERT_TRUE(utils::filesystem::directory_exists(test));\n    }\n\n    sleep(5);\n\n    std::vector<std::string> data_dirs{\"./\"};\n    disk_cleaning_report report{};\n    dsn::replication::disk_remove_useless_dirs(data_dirs, report);\n\n    for (const auto &test : tests) {\n        if (!dsn::replication::is_data_dir_removable(test)) {\n            ASSERT_TRUE(utils::filesystem::directory_exists(test));\n            continue;\n        }\n        ASSERT_FALSE(utils::filesystem::directory_exists(test));\n    }\n\n    ASSERT_EQ(report.remove_dir_count, 5);\n    ASSERT_EQ(report.disk_migrate_origin_count, 1);\n    ASSERT_EQ(report.disk_migrate_tmp_count, 1);\n    ASSERT_EQ(report.garbage_replica_count, 1);\n    ASSERT_EQ(report.error_replica_count, 2);\n}\n\nTEST_F(replica_disk_test, disk_status_test)\n{\n    int32_t node_index = 0;\n    struct disk_status_test\n    {\n        disk_status::type old_status;\n        disk_status::type new_status;\n    } tests[]{{disk_status::NORMAL, disk_status::NORMAL},\n              {disk_status::NORMAL, disk_status::SPACE_INSUFFICIENT},\n              {disk_status::SPACE_INSUFFICIENT, disk_status::SPACE_INSUFFICIENT},\n              {disk_status::SPACE_INSUFFICIENT, disk_status::NORMAL}};\n    for (const auto &test : tests) {\n        auto node = get_dir_nodes()[node_index];\n        mock_node_status(node_index, test.old_status, test.new_status);\n        update_disks_status();\n        for (auto &kv : node->holding_replicas) {\n            for (auto &pid : kv.second) {\n                bool flag;\n                ASSERT_EQ(replica_disk_space_insufficient(pid, flag), ERR_OK);\n                ASSERT_EQ(flag, test.new_status == disk_status::SPACE_INSUFFICIENT);\n            }\n        }\n    }\n    mock_node_status(node_index, disk_status::NORMAL, disk_status::NORMAL);\n}\n\nTEST_F(replica_disk_test, broken_disk_test)\n{\n    // Test cases:\n    // create: true, check_rw: true\n    // create: true, check_rw: false\n    // create: false\n    struct broken_disk_test\n    {\n        std::string mock_create_dir;\n        std::string mock_rw_flag;\n        int32_t data_dir_size;\n    } tests[]{{\"true\", \"true\", 3}, {\"true\", \"false\", 2}, {\"false\", \"false\", 2}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(test.data_dir_size,\n                  ignore_broken_disk_test(test.mock_create_dir, test.mock_rw_flag));\n    }\n}\n\nTEST_F(replica_disk_test, add_new_disk_test)\n{\n    // Test case:\n    // - invalid params\n    // - dir is available dir\n    // - dir is not empty\n    // - create dir failed\n    // - dir can't read or write\n    // - succeed\n    struct add_disk_test\n    {\n        std::string disk_str;\n        std::string create_dir;\n        std::string rw_flag;\n        error_code expected_err;\n    } tests[]{{\"\", \"true\", \"true\", ERR_INVALID_PARAMETERS},\n              {\"wrong_format\", \"true\", \"true\", ERR_INVALID_PARAMETERS},\n              {\"add_new_exist_tag:add_new_exist_disk0\", \"true\", \"true\", ERR_NODE_ALREADY_EXIST},\n              {\"add_new_exist_tag0:add_new_exist_disk\", \"true\", \"true\", ERR_NODE_ALREADY_EXIST},\n              {\"add_new_not_empty_tag:add_new_not_empty_disk\", \"true\", \"true\", ERR_DIR_NOT_EMPTY},\n              {\"new_tag1:new_disk1\", \"false\", \"true\", ERR_FILE_OPERATION_FAILED},\n              {\"new_tag1:new_disk1\", \"true\", \"false\", ERR_FILE_OPERATION_FAILED},\n              {\"new_tag:new_disk\", \"true\", \"true\", ERR_OK}};\n    for (const auto &test : tests) {\n        prepare_before_add_new_disk_test(test.create_dir, test.rw_flag);\n        ASSERT_EQ(send_add_new_disk_rpc(test.disk_str), test.expected_err);\n        reset_after_add_new_disk_test();\n    }\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_disk_test_base.h",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"replica/test/replica_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_disk_test_base : public replica_test_base\n{\npublic:\n    // create `dir_nodes_count`(tag_1~tag_5) mock disk:\n    // capacity info\n    // node_disk     disk_capacity  disk_available_mb  disk_available_ratio\n    //  tag_1            100*5             50*1              10%\n    //  tag_2            100*5             50*2              20%\n    //  tag_3            100*5             50*3              30%\n    //  tag_4            100*5             50*4              40%\n    //  tag_5            100*5             50*5              50%\n    //  total            2500              750               30%\n    // replica info, for example:\n    //   dir_node             primary/secondary\n    //\n    //   tag_empty_1\n    //   tag_1                1.1 | 1.2,1.3\n    //                        2.1,2.2 | 2.3,2.4,2.5,2.6\n    //\n    //   tag_2                1.4 | 1.5,1.6\n    //                        2.7,2.8 | 2.9,2.10,2.11,2.12,2.13\n    //            ...\n    //            ...\n    replica_disk_test_base()\n    {\n        fail::setup();\n\n        fail::cfg(\"update_disk_stat\", \"return()\");\n        generate_mock_app_info();\n\n        generate_mock_dir_nodes(dir_nodes_count);\n        generate_mock_empty_dir_node(empty_dir_nodes_count);\n\n        stub->generate_replicas_base_dir_nodes_for_app(\n            app_info_1, app_id_1_primary_count_for_disk, app_id_1_secondary_count_for_disk);\n\n        stub->generate_replicas_base_dir_nodes_for_app(\n            app_info_2, app_id_2_primary_count_for_disk, app_id_2_secondary_count_for_disk);\n        stub->on_disk_stat();\n    }\n\n    ~replica_disk_test_base() { fail::teardown(); }\n\n    void update_disk_replica() { stub->on_disk_stat(); }\n\n    void update_disks_status() { stub->update_disks_status(); }\n\n    std::vector<std::shared_ptr<dir_node>> get_dir_nodes() { return stub->_fs_manager._dir_nodes; }\n\n    void generate_mock_dir_node(const app_info &app,\n                                const gpid pid,\n                                const std::string &tag,\n                                const std::string &full_dir = \"full_dir\")\n    {\n        dir_node *node_disk = new dir_node(tag, full_dir);\n        node_disk->holding_replicas[app.app_id].emplace(pid);\n        stub->_fs_manager._dir_nodes.emplace_back(node_disk);\n        stub->_fs_manager._available_data_dirs.emplace_back(full_dir);\n    }\n\n    void remove_mock_dir_node(const std::string &tag)\n    {\n        for (auto iter = stub->_fs_manager._dir_nodes.begin();\n             iter != stub->_fs_manager._dir_nodes.end();\n             iter++) {\n            if ((*iter)->tag == tag) {\n                stub->_fs_manager._dir_nodes.erase(iter);\n                break;\n            }\n        }\n    }\n\n    void\n    mock_node_status(int32_t node_index, disk_status::type old_status, disk_status::type new_status)\n    {\n        auto node = get_dir_nodes()[node_index];\n        for (const auto &kv : node->holding_replicas) {\n            for (const auto &pid : kv.second) {\n                update_replica_disk_status(pid, old_status);\n            }\n        }\n        stub->_fs_manager._status_updated_dir_nodes.clear();\n        if (old_status != new_status) {\n            node->status = new_status;\n            stub->_fs_manager._status_updated_dir_nodes.emplace_back(node);\n        }\n    }\n\n    error_code replica_disk_space_insufficient(const gpid &pid, bool &flag)\n    {\n        replica_ptr replica = stub->get_replica(pid);\n        if (replica == nullptr) {\n            return ERR_OBJECT_NOT_FOUND;\n        }\n        flag = replica->disk_space_insufficient();\n        return ERR_OK;\n    }\n\n    int32_t ignore_broken_disk_test(const std::string &mock_create_directory,\n                                    const std::string &mock_check_rw)\n    {\n        std::vector<std::string> data_dirs = {\"disk1\", \"disk2\", \"disk3\"};\n        std::vector<std::string> data_dir_tags = {\"tag1\", \"tag2\", \"tag3\"};\n        auto test_stub = make_unique<mock_replica_stub>();\n        fail::cfg(\"filesystem_create_directory\", \"return(\" + mock_create_directory + \")\");\n        fail::cfg(\"filesystem_check_dir_rw\", \"return(\" + mock_check_rw + \")\");\n        fail::cfg(\"update_disk_stat\", \"return()\");\n        test_stub->initialize_fs_manager(data_dirs, data_dir_tags);\n        int32_t dir_size = test_stub->_fs_manager.get_available_data_dirs().size();\n        test_stub.reset();\n        return dir_size;\n    }\n\n    void prepare_before_add_new_disk_test(const std::string &create_dir,\n                                          const std::string &check_rw)\n    {\n        stub->_fs_manager.add_new_dir_node(\"add_new_exist_disk/replica/reps\", \"add_new_exist_tag\");\n        std::string dir_name = \"add_new_not_empty_disk/replica/reps\";\n        utils::filesystem::create_directory(dir_name);\n        utils::filesystem::create_file(dir_name + \"/test_file\");\n        fail::cfg(\"filesystem_create_directory\", \"return(\" + create_dir + \")\");\n        fail::cfg(\"filesystem_check_dir_rw\", \"return(\" + check_rw + \")\");\n    }\n\n    void reset_after_add_new_disk_test()\n    {\n        stub->_fs_manager._dir_nodes.clear();\n        stub->_fs_manager._available_data_dirs.clear();\n        dsn::utils::filesystem::remove_path(\"add_new_not_empty_disk\");\n    }\n\npublic:\n    int empty_dir_nodes_count = 1;\n    int dir_nodes_count = 5;\n\n    dsn::app_info app_info_1;\n    int app_id_1_primary_count_for_disk = 1;\n    int app_id_1_secondary_count_for_disk = 2;\n\n    dsn::app_info app_info_2;\n    int app_id_2_primary_count_for_disk = 2;\n    int app_id_2_secondary_count_for_disk = 4;\n\nprivate:\n    void generate_mock_app_info()\n    {\n        app_info_1.app_id = 1;\n        app_info_1.app_name = \"disk_test_1\";\n        app_info_1.app_type = \"replica\";\n        app_info_1.is_stateful = true;\n        app_info_1.max_replica_count = 3;\n        app_info_1.partition_count = 8;\n\n        app_info_2.app_id = 2;\n        app_info_2.app_name = \"disk_test_2\";\n        app_info_2.app_type = \"replica\";\n        app_info_2.is_stateful = true;\n        app_info_2.max_replica_count = 3;\n        app_info_2.partition_count = 16;\n    }\n\n    void generate_mock_empty_dir_node(int num)\n    {\n        while (num > 0) {\n            dir_node *node_disk =\n                new dir_node(fmt::format(\"tag_empty_{}\", num), fmt::format(\"./tag_empty_{}\", num));\n            stub->_fs_manager._dir_nodes.emplace_back(node_disk);\n            stub->_fs_manager._available_data_dirs.emplace_back(node_disk->full_dir);\n            utils::filesystem::create_directory(node_disk->full_dir);\n            num--;\n        }\n    }\n\n    void generate_mock_dir_nodes(int num)\n    {\n        int app_id_1_disk_holding_replica_count =\n            app_id_1_primary_count_for_disk + app_id_1_secondary_count_for_disk;\n        int app_id_2_disk_holding_replica_count =\n            app_id_2_primary_count_for_disk + app_id_2_secondary_count_for_disk;\n\n        int app_id_1_partition_index = 1;\n        int app_id_2_partition_index = 1;\n\n        int64_t disk_capacity_mb = num * 100;\n        int count = 0;\n        while (count++ < num) {\n            int64_t disk_available_mb = count * 50;\n            int disk_available_ratio =\n                static_cast<int>(std::round((double)100 * disk_available_mb / disk_capacity_mb));\n            // create one mock dir_node and make sure disk_capacity_mb_ > disk_available_mb_\n            dir_node *node_disk = new dir_node(\"tag_\" + std::to_string(count),\n                                               \"./tag_\" + std::to_string(count),\n                                               disk_capacity_mb,\n                                               disk_available_mb,\n                                               disk_available_ratio);\n\n            stub->_options.data_dirs.push_back(\n                node_disk->full_dir); // open replica need the options\n            utils::filesystem::create_directory(node_disk->full_dir);\n\n            int app_1_replica_count_per_disk = app_id_1_disk_holding_replica_count;\n            while (app_1_replica_count_per_disk-- > 0) {\n                node_disk->holding_replicas[app_info_1.app_id].emplace(\n                    gpid(app_info_1.app_id, app_id_1_partition_index++));\n            }\n\n            int app_2_replica_count_per_disk = app_id_2_disk_holding_replica_count;\n            while (app_2_replica_count_per_disk-- > 0) {\n                node_disk->holding_replicas[app_info_2.app_id].emplace(\n                    gpid(app_info_2.app_id, app_id_2_partition_index++));\n            }\n\n            stub->_fs_manager._dir_nodes.emplace_back(node_disk);\n            stub->_fs_manager._available_data_dirs.emplace_back(node_disk->full_dir);\n        }\n    }\n\n    void update_replica_disk_status(const gpid &pid, const disk_status::type status)\n    {\n        replica_ptr replica = stub->get_replica(pid);\n        if (replica == nullptr) {\n            return;\n        }\n        replica->set_disk_status(status);\n    }\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_learn_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include \"replica/replica.h\"\n#include \"mock_utils.h\"\n#include \"replica/duplication/test/duplication_test_base.h\"\n\nnamespace dsn {\nnamespace replication {\n\n/*static*/ mock_mutation_duplicator::duplicate_function mock_mutation_duplicator::_func;\n\nclass replica_learn_test : public duplication_test_base\n{\npublic:\n    replica_learn_test() = default;\n\n    std::unique_ptr<mock_replica> create_duplicating_replica()\n    {\n        gpid gpid(1, 1);\n        app_info app_info;\n        app_info.app_type = \"replica\";\n        app_info.duplicating = true;\n        auto r = make_unique<mock_replica>(stub.get(), gpid, app_info, \"./\");\n        r->as_primary();\n        return r;\n    }\n\n    void test_get_learn_start_decree()\n    {\n        { // no duplication\n            learn_request req;\n            req.last_committed_decree_in_app = 5;\n            req.max_gced_decree = 3;\n\n            // local_committed_decree = 5\n            _replica->mock_max_gced_decree(0);\n            _replica->_prepare_list->reset(5);\n\n            ASSERT_EQ(_replica->get_learn_start_decree(req), 6);\n        }\n        struct test_data\n        {\n            decree learner_last_committed_decree;\n            decree learner_max_gced_decree;\n            decree learnee_local_committed_decree;\n            decree learnee_max_gced_decree;\n            decree min_confirmed_decree;\n\n            decree wlearn_start_decree;\n        } tests[] = {\n            // min_confirmed_decree(3) >= 0\n            // learn_start_decree_for_dup(4) < learn_start_decree_no_dup(6)\n            // request.max_gced_decree == invalid_decree\n            {5, invalid_decree, 5, 0, 3, 4},\n\n            // min_confirmed_decree(3) >= 0\n            // learn_start_decree_for_dup(4) < learn_start_decree_no_dup(6)\n            // learn_start_decree_for_dup(4) <= request.max_gced_decree(4)\n            {5, 4, 5, 0, 3, 4},\n\n            // min_confirmed_decree(3) >= 0\n            // learn_start_decree_for_dup(4) < learn_start_decree_no_dup(6)\n            // learn_start_decree_for_dup(4) > request.max_gced_decree(0)\n            {5, 0, 5, 0, 3, 6},\n\n            // min_confirmed_decree(3) >= 0\n            // learn_start_decree_for_dup(4) > learn_start_decree_no_dup(1)\n            {0, 4, 5, 0, 3, 1},\n\n            // min_confirmed_decree == invalid_decree\n            // local_gced == invalid_decree\n            // abnormal case\n            {5, invalid_decree, 5, invalid_decree, invalid_decree, 6},\n\n            // min_confirmed_decree == invalid_decree\n            // local_gced(2) != invalid_decree\n            // learn_start_decree_for_dup(3) < learn_start_decree_no_dup(6)\n            // request.max_gced_decree == invalid_decree\n            {5, invalid_decree, 5, 2, invalid_decree, 3},\n\n            // min_confirmed_decree == invalid_decree\n            // local_gced(2) != invalid_decree\n            // learn_start_decree_for_dup(3) < learn_start_decree_no_dup(6)\n            // learn_start_decree_for_dup(3) <= request.max_gced_decree(3)\n            {5, 3, 5, 2, invalid_decree, 3},\n            // local_gced(0) != invalid_decree\n            // learn_start_decree_for_dup(1) < learn_start_decree_no_dup(6)\n            // learn_start_decree_for_dup(1) <= request.max_gced_decree(3)\n            {5, 3, 5, 0, invalid_decree, 1},\n\n            // min_confirmed_decree == invalid_decree\n            // local_gced(2) != invalid_decree\n            // learn_start_decree_for_dup(3) < learn_start_decree_no_dup(6)\n            // learn_start_decree_for_dup(3) > request.max_gced_decree(0)\n            {5, 0, 5, 2, invalid_decree, 6},\n\n            // min_confirmed_decree == invalid_decree\n            // local_gced(2) != invalid_decree\n            // learn_start_decree_for_dup(3) > learn_start_decree_no_dup(1)\n            {0, invalid_decree, 5, 2, invalid_decree, 1},\n            // learn_start_decree_for_dup(3) > learn_start_decree_no_dup(2)\n            {1, invalid_decree, 5, 2, invalid_decree, 2},\n\n        };\n\n        int id = 1;\n        for (auto tt : tests) {\n            _replica = create_duplicating_replica();\n            _replica->mock_max_gced_decree(tt.learnee_max_gced_decree);\n\n            learn_request req;\n            req.last_committed_decree_in_app = tt.learner_last_committed_decree;\n            req.max_gced_decree = tt.learner_max_gced_decree;\n\n            _replica->_prepare_list->reset(tt.learnee_local_committed_decree);\n\n            _replica->init_private_log(_log_dir);\n            auto dup = create_test_duplicator(tt.min_confirmed_decree);\n            add_dup(_replica.get(), std::move(dup));\n\n            ASSERT_EQ(_replica->get_learn_start_decree(req), tt.wlearn_start_decree) << \"case #\"\n                                                                                     << id;\n            id++;\n        }\n    }\n\n    void test_get_max_gced_decree_for_learn()\n    {\n        struct test_data\n        {\n            decree first_learn_start_decree;\n            decree plog_max_gced_decree;\n\n            decree want;\n        } tests[] = {{invalid_decree, 10, 10},\n                     {invalid_decree, invalid_decree, invalid_decree},\n                     {10, 20, 9},\n                     {10, invalid_decree, 9},\n                     {10, 5, 5}};\n        for (auto tt : tests) {\n            _replica = create_duplicating_replica();\n            _replica->mock_max_gced_decree(tt.plog_max_gced_decree);\n            _replica->_potential_secondary_states.first_learn_start_decree =\n                tt.first_learn_start_decree;\n            ASSERT_EQ(_replica->get_max_gced_decree_for_learn(), tt.want);\n        }\n    }\n};\n\nTEST_F(replica_learn_test, get_learn_start_decree) { test_get_learn_start_decree(); }\n\nTEST_F(replica_learn_test, get_max_gced_decree_for_learn) { test_get_max_gced_decree_for_learn(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/dist/replication/replica_envs.h>\n#include <dsn/utility/defer.h>\n#include <gtest/gtest.h>\n#include <dsn/utility/filesystem.h>\n#include \"runtime/rpc/network.sim.h\"\n\n#include \"common/backup_common.h\"\n#include \"replica_test_base.h\"\n#include \"replica/replica.h\"\n#include \"replica/replica_http_service.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_test : public replica_test_base\n{\npublic:\n    replica_test()\n        : pid(gpid(2, 1)),\n          _backup_id(dsn_now_ms()),\n          _provider_name(\"local_service\"),\n          _policy_name(\"mock_policy\")\n    {\n    }\n\n    void SetUp() override\n    {\n        FLAGS_enable_http_server = false;\n        stub->install_perf_counters();\n        mock_app_info();\n        _mock_replica = stub->generate_replica_ptr(_app_info, pid, partition_status::PS_PRIMARY, 1);\n\n        // set cold_backup_root manually.\n        // `cold_backup_root` is set by configuration \"replication.cold_backup_root\",\n        // which is usually the cluster_name of production clusters.\n        _mock_replica->_options->cold_backup_root = \"test_cluster\";\n    }\n\n    int get_write_size_exceed_threshold_count()\n    {\n        return stub->_counter_recent_write_size_exceed_threshold_count->get_value();\n    }\n\n    int get_table_level_backup_request_qps()\n    {\n        return _mock_replica->_counter_backup_request_qps->get_integer_value();\n    }\n\n    bool get_validate_partition_hash() const { return _mock_replica->_validate_partition_hash; }\n\n    void reset_validate_partition_hash() { _mock_replica->_validate_partition_hash = false; }\n\n    void update_validate_partition_hash(bool old_value, bool set_in_map, std::string new_value)\n    {\n        _mock_replica->_validate_partition_hash = old_value;\n        std::map<std::string, std::string> envs;\n        if (set_in_map) {\n            envs[replica_envs::SPLIT_VALIDATE_PARTITION_HASH] = new_value;\n        }\n        _mock_replica->update_bool_envs(envs,\n                                        replica_envs::SPLIT_VALIDATE_PARTITION_HASH,\n                                        _mock_replica->_validate_partition_hash);\n    }\n\n    bool get_allow_ingest_behind() const { return _mock_replica->_allow_ingest_behind; }\n\n    void reset_allow_ingest_behind() { _mock_replica->_allow_ingest_behind = false; }\n\n    void update_allow_ingest_behind(bool old_value, bool set_in_map, std::string new_value)\n    {\n        _mock_replica->_allow_ingest_behind = old_value;\n        std::map<std::string, std::string> envs;\n        if (set_in_map) {\n            envs[replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND] = new_value;\n        }\n        _mock_replica->update_bool_envs(\n            envs, replica_envs::ROCKSDB_ALLOW_INGEST_BEHIND, _mock_replica->_allow_ingest_behind);\n    }\n\n    const deny_client &update_deny_client(const std::string &env_name, std::string env_value)\n    {\n        std::map<std::string, std::string> envs;\n        envs[env_name] = std::move(env_value);\n        _mock_replica->update_deny_client(envs);\n        return _mock_replica->_deny_client;\n    }\n\n    void mock_app_info()\n    {\n        _app_info.app_id = 2;\n        _app_info.app_name = \"replica_test\";\n        _app_info.app_type = \"replica\";\n        _app_info.is_stateful = true;\n        _app_info.max_replica_count = 3;\n        _app_info.partition_count = 8;\n    }\n\n    void test_on_cold_backup(const std::string user_specified_path = \"\")\n    {\n        backup_request req;\n        req.pid = pid;\n        policy_info backup_policy_info;\n        backup_policy_info.__set_backup_provider_type(_provider_name);\n        backup_policy_info.__set_policy_name(_policy_name);\n        req.policy = backup_policy_info;\n        req.app_name = _app_info.app_name;\n        req.backup_id = _backup_id;\n        if (!user_specified_path.empty()) {\n            req.__set_backup_path(user_specified_path);\n        }\n\n        // test cold backup could complete.\n        backup_response resp;\n        do {\n            _mock_replica->on_cold_backup(req, resp);\n        } while (resp.err == ERR_BUSY);\n        ASSERT_EQ(ERR_OK, resp.err);\n\n        // test checkpoint files have been uploaded successfully.\n        std::string backup_root = dsn::utils::filesystem::path_combine(\n            user_specified_path, _mock_replica->_options->cold_backup_root);\n        std::string current_chkpt_file =\n            cold_backup::get_current_chkpt_file(backup_root, req.app_name, req.pid, req.backup_id);\n        ASSERT_TRUE(dsn::utils::filesystem::file_exists(current_chkpt_file));\n        int64_t size = 0;\n        dsn::utils::filesystem::file_size(current_chkpt_file, size);\n        ASSERT_LT(0, size);\n    }\n\n    error_code test_find_valid_checkpoint(const std::string user_specified_path = \"\")\n    {\n        configuration_restore_request req;\n        req.app_id = _app_info.app_id;\n        req.app_name = _app_info.app_name;\n        req.backup_provider_name = _provider_name;\n        req.cluster_name = _mock_replica->_options->cold_backup_root;\n        req.time_stamp = _backup_id;\n        if (!user_specified_path.empty()) {\n            req.__set_restore_path(user_specified_path);\n        }\n\n        std::string remote_chkpt_dir;\n        return _mock_replica->find_valid_checkpoint(req, remote_chkpt_dir);\n    }\n\n    void force_update_checkpointing(bool running)\n    {\n        _mock_replica->_is_manual_emergency_checkpointing = running;\n    }\n\n    bool is_checkpointing() { return _mock_replica->_is_manual_emergency_checkpointing; }\n\n    replica *call_clear_on_failure(replica_stub *stub,\n                                   replica *rep,\n                                   const std::string &path,\n                                   const gpid &gpid)\n    {\n        return replica::clear_on_failure(stub, rep, path, gpid);\n    }\n\n    bool has_gpid(gpid &gpid) const\n    {\n        for (const auto &node : stub->_fs_manager._dir_nodes) {\n            if (node->has(gpid)) {\n                return true;\n            }\n        }\n        return false;\n    }\n\n    void test_update_app_max_replica_count()\n    {\n        const auto reserved_max_replica_count = _app_info.max_replica_count;\n        const int32_t target_max_replica_count = 5;\n        dassert_f(target_max_replica_count != reserved_max_replica_count,\n                  \"target_max_replica_count should not be equal to reserved_max_replica_count:\"\n                  \"target_max_replica_count={}, reserved_max_replica_count={}\",\n                  target_max_replica_count,\n                  reserved_max_replica_count);\n\n        // store new max_replica_count into file\n        _mock_replica->update_app_max_replica_count(target_max_replica_count);\n        _app_info.max_replica_count = target_max_replica_count;\n\n        dsn::app_info info;\n        replica_app_info replica_info(&info);\n\n        auto path = dsn::utils::filesystem::path_combine(_mock_replica->_dir,\n                                                         dsn::replication::replica::kAppInfo);\n        std::cout << \"the path of .app-info file is \" << path << std::endl;\n\n        // load new max_replica_count from file\n        auto err = replica_info.load(path);\n        ASSERT_EQ(err, ERR_OK);\n        ASSERT_EQ(info, _mock_replica->_app_info);\n        std::cout << \"the loaded new app_info is \" << info << std::endl;\n\n        // recover original max_replica_count\n        _mock_replica->update_app_max_replica_count(reserved_max_replica_count);\n        _app_info.max_replica_count = reserved_max_replica_count;\n\n        // load original max_replica_count from file\n        err = replica_info.load(path);\n        ASSERT_EQ(err, ERR_OK);\n        ASSERT_EQ(info, _mock_replica->_app_info);\n        std::cout << \"the loaded original app_info is \" << info << std::endl;\n    }\n\npublic:\n    dsn::app_info _app_info;\n    dsn::gpid pid;\n    mock_replica_ptr _mock_replica;\n\nprivate:\n    const int64_t _backup_id;\n    const std::string _provider_name;\n    const std::string _policy_name;\n};\n\nTEST_F(replica_test, write_size_limited)\n{\n    int count = 100;\n    task_code default_code;\n    struct dsn::message_header header;\n    header.body_length = 10000000;\n\n    auto write_request = dsn::message_ex::create_request(default_code);\n    auto cleanup = dsn::defer([=]() { delete write_request; });\n    write_request->header = &header;\n    std::unique_ptr<tools::sim_network_provider> sim_net(\n        new tools::sim_network_provider(nullptr, nullptr));\n    write_request->io_session = sim_net->create_client_session(rpc_address());\n\n    for (int i = 0; i < count; i++) {\n        stub->on_client_write(pid, write_request);\n    }\n\n    ASSERT_EQ(get_write_size_exceed_threshold_count(), count);\n}\n\nTEST_F(replica_test, backup_request_qps)\n{\n    // create backup request\n    struct dsn::message_header header;\n    header.context.u.is_backup_request = true;\n    message_ptr backup_request = dsn::message_ex::create_request(task_code());\n    backup_request->header = &header;\n    std::unique_ptr<tools::sim_network_provider> sim_net(\n        new tools::sim_network_provider(nullptr, nullptr));\n    backup_request->io_session = sim_net->create_client_session(rpc_address());\n\n    _mock_replica->on_client_read(backup_request);\n\n    // We have to sleep >= 0.1s, or the value this perf-counter will be 0, according to the\n    // implementation of perf-counter which type is COUNTER_TYPE_RATE.\n    usleep(1e5);\n    ASSERT_GT(get_table_level_backup_request_qps(), 0);\n}\n\nTEST_F(replica_test, query_data_version_test)\n{\n    replica_http_service http_svc(stub.get());\n    struct query_data_version_test\n    {\n        std::string app_id;\n        http_status_code expected_code;\n        std::string expected_response_json;\n    } tests[] = {{\"\", http_status_code::bad_request, \"app_id should not be empty\"},\n                 {\"wrong\", http_status_code::bad_request, \"invalid app_id=wrong\"},\n                 {\"2\",\n                  http_status_code::ok,\n                  R\"({\"1\":{\"data_version\":\"1\"}})\"},\n                 {\"4\", http_status_code::not_found, \"app_id=4 not found\"}};\n    for (const auto &test : tests) {\n        http_request req;\n        http_response resp;\n        if (!test.app_id.empty()) {\n            req.query_args[\"app_id\"] = test.app_id;\n        }\n        http_svc.query_app_data_version_handler(req, resp);\n        ASSERT_EQ(resp.status_code, test.expected_code);\n        std::string expected_json = test.expected_response_json;\n        ASSERT_EQ(resp.body, expected_json);\n    }\n}\n\nTEST_F(replica_test, query_compaction_test)\n{\n    replica_http_service http_svc(stub.get());\n    struct query_compaction_test\n    {\n        std::string app_id;\n        http_status_code expected_code;\n        std::string expected_response_json;\n    } tests[] = {{\"\", http_status_code::bad_request, \"app_id should not be empty\"},\n                 {\"xxx\", http_status_code::bad_request, \"invalid app_id=xxx\"},\n                 {\"2\",\n                  http_status_code::ok,\n                  R\"({\"status\":{\"finished\":0,\"idle\":1,\"queuing\":0,\"running\":0}})\"},\n                 {\"4\",\n                  http_status_code::ok,\n                  R\"({\"status\":{\"finished\":0,\"idle\":0,\"queuing\":0,\"running\":0}})\"}};\n    for (const auto &test : tests) {\n        http_request req;\n        http_response resp;\n        if (!test.app_id.empty()) {\n            req.query_args[\"app_id\"] = test.app_id;\n        }\n        http_svc.query_manual_compaction_handler(req, resp);\n        ASSERT_EQ(resp.status_code, test.expected_code);\n        ASSERT_EQ(resp.body, test.expected_response_json);\n    }\n}\n\nTEST_F(replica_test, update_validate_partition_hash_test)\n{\n    struct update_validate_partition_hash_test\n    {\n        bool set_in_map;\n        bool old_value;\n        std::string new_value;\n        bool expected_value;\n    } tests[]{{true, false, \"false\", false},\n              {true, false, \"true\", true},\n              {true, true, \"true\", true},\n              {true, true, \"false\", false},\n              {false, false, \"\", false},\n              {false, true, \"\", false},\n              {true, true, \"flase\", true},\n              {true, false, \"ture\", false}};\n    for (const auto &test : tests) {\n        update_validate_partition_hash(test.old_value, test.set_in_map, test.new_value);\n        ASSERT_EQ(get_validate_partition_hash(), test.expected_value);\n        reset_validate_partition_hash();\n    }\n}\n\nTEST_F(replica_test, update_allow_ingest_behind_test)\n{\n    struct update_allow_ingest_behind_test\n    {\n        bool set_in_map;\n        bool old_value;\n        std::string new_value;\n        bool expected_value;\n    } tests[]{{true, false, \"false\", false},\n              {true, false, \"true\", true},\n              {true, true, \"true\", true},\n              {true, true, \"false\", false},\n              {false, false, \"\", false},\n              {false, true, \"\", false},\n              {true, true, \"flase\", true},\n              {true, false, \"ture\", false}};\n    for (const auto &test : tests) {\n        update_allow_ingest_behind(test.old_value, test.set_in_map, test.new_value);\n        ASSERT_EQ(get_allow_ingest_behind(), test.expected_value);\n        reset_allow_ingest_behind();\n    }\n}\n\nTEST_F(replica_test, test_replica_backup_and_restore)\n{\n    test_on_cold_backup();\n    auto err = test_find_valid_checkpoint();\n    ASSERT_EQ(ERR_OK, err);\n}\n\nTEST_F(replica_test, test_replica_backup_and_restore_with_specific_path)\n{\n    std::string user_specified_path = \"test/backup\";\n    test_on_cold_backup(user_specified_path);\n    auto err = test_find_valid_checkpoint(user_specified_path);\n    ASSERT_EQ(ERR_OK, err);\n}\n\nTEST_F(replica_test, test_trigger_manual_emergency_checkpoint)\n{\n    ASSERT_EQ(_mock_replica->trigger_manual_emergency_checkpoint(100), ERR_OK);\n    ASSERT_TRUE(is_checkpointing());\n    _mock_replica->update_last_durable_decree(100);\n\n    // test no need start checkpoint because `old_decree` < `last_durable`\n    ASSERT_EQ(_mock_replica->trigger_manual_emergency_checkpoint(100), ERR_OK);\n    ASSERT_FALSE(is_checkpointing());\n\n    // test has existed running task\n    force_update_checkpointing(true);\n    ASSERT_EQ(_mock_replica->trigger_manual_emergency_checkpoint(101), ERR_BUSY);\n    ASSERT_TRUE(is_checkpointing());\n    // test running task completed\n    _mock_replica->tracker()->wait_outstanding_tasks();\n    ASSERT_FALSE(is_checkpointing());\n\n    // test exceed max concurrent count\n    ASSERT_EQ(_mock_replica->trigger_manual_emergency_checkpoint(101), ERR_OK);\n    force_update_checkpointing(false);\n    FLAGS_max_concurrent_manual_emergency_checkpointing_count = 1;\n    ASSERT_EQ(_mock_replica->trigger_manual_emergency_checkpoint(101), ERR_TRY_AGAIN);\n    ASSERT_FALSE(is_checkpointing());\n    _mock_replica->tracker()->wait_outstanding_tasks();\n}\n\nTEST_F(replica_test, test_query_last_checkpoint_info)\n{\n    // test no exist gpid\n    auto req = std::make_unique<learn_request>();\n    req->pid = gpid(100, 100);\n    query_last_checkpoint_info_rpc rpc =\n        query_last_checkpoint_info_rpc(std::move(req), RPC_QUERY_LAST_CHECKPOINT_INFO);\n    stub->on_query_last_checkpoint(rpc);\n    ASSERT_EQ(rpc.response().err, ERR_OBJECT_NOT_FOUND);\n\n    learn_response resp;\n    // last_checkpoint hasn't exist\n    _mock_replica->on_query_last_checkpoint(resp);\n    ASSERT_EQ(resp.err, ERR_PATH_NOT_FOUND);\n\n    // query ok\n    _mock_replica->update_last_durable_decree(100);\n    _mock_replica->set_last_committed_decree(200);\n    _mock_replica->on_query_last_checkpoint(resp);\n    ASSERT_EQ(resp.last_committed_decree, 200);\n    ASSERT_EQ(resp.base_local_dir, \"./data/checkpoint.100\");\n}\n\nTEST_F(replica_test, test_clear_on_failer)\n{\n    replica *rep =\n        stub->generate_replica(_app_info, pid, partition_status::PS_PRIMARY, 1, false, true);\n    auto path = stub->get_replica_dir(_app_info.app_type.c_str(), pid);\n    dsn::utils::filesystem::create_directory(path);\n    ASSERT_TRUE(dsn::utils::filesystem::path_exists(path));\n    ASSERT_TRUE(has_gpid(pid));\n\n    ASSERT_FALSE(call_clear_on_failure(stub.get(), rep, path, pid));\n\n    ASSERT_FALSE(dsn::utils::filesystem::path_exists(path));\n    ASSERT_FALSE(has_gpid(pid));\n}\n\nTEST_F(replica_test, update_deny_client_test)\n{\n    struct update_deny_client_test\n    {\n        std::string env_name;\n        std::string env_value;\n        deny_client expected;\n    } tests[]{{\"invalid\", \"invalid\", {false, false, false}},\n              {\"replica.deny_client_request\", \"reconfig*all\", {true, true, true}},\n              {\"replica.deny_client_request\", \"reconfig*write\", {false, true, true}},\n              {\"replica.deny_client_request\", \"reconfig*read\", {true, false, true}},\n              {\"replica.deny_client_request\", \"timeout*all\", {true, true, false}},\n              {\"replica.deny_client_request\", \"timeout*write\", {false, true, false}},\n              {\"replica.deny_client_request\", \"timeout*read\", {true, false, false}}};\n    for (const auto &test : tests) {\n        ASSERT_EQ(update_deny_client(test.env_name, test.env_value), test.expected);\n    }\n}\n\nTEST_F(replica_test, test_update_app_max_replica_count) { test_update_app_max_replica_count(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replica_test_base.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/dist/replication/replication_app_base.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/errors.h>\n#include <gtest/gtest.h>\n\n#include \"replica/replica_stub.h\"\n\n#include \"mock_utils.h\"\n\nnamespace dsn {\nnamespace replication {\n\nclass replica_stub_test_base : public ::testing::Test\n{\npublic:\n    replica_stub_test_base() { stub = make_unique<mock_replica_stub>(); }\n\n    ~replica_stub_test_base() { stub.reset(); }\n\n    std::unique_ptr<mock_replica_stub> stub;\n};\n\nclass replica_test_base : public replica_stub_test_base\n{\npublic:\n    std::unique_ptr<mock_replica> _replica;\n    const std::string _log_dir{\"./test-log\"};\n\n    replica_test_base() { _replica = create_mock_replica(stub.get(), 1, 1, _log_dir.c_str()); }\n\n    virtual mutation_ptr create_test_mutation(int64_t decree, const std::string &data)\n    {\n        mutation_ptr mu(new mutation());\n        mu->data.header.ballot = 1;\n        mu->data.header.decree = decree;\n        mu->data.header.pid = _replica->get_gpid();\n        mu->data.header.last_committed_decree = decree - 1;\n        mu->data.header.log_offset = 0;\n        mu->data.header.timestamp = decree;\n\n        mu->data.updates.emplace_back(mutation_update());\n        mu->data.updates.back().code =\n            RPC_COLD_BACKUP; // whatever code it is, but never be WRITE_EMPTY\n        mu->data.updates.back().data = blob::create_from_bytes(std::string(data));\n        mu->client_requests.push_back(nullptr);\n\n        // replica_duplicator always loads from hard disk,\n        // so it must be logged.\n        mu->set_logged();\n\n        return mu;\n    }\n\n    gpid get_gpid() const { return _replica->get_gpid(); }\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/replica/test/replication_service_test_app.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/dist/replication/replication_service_app.h>\nusing ::dsn::replication::replication_service_app;\nusing ::dsn::error_code;\n\nclass replication_service_test_app : public replication_service_app\n{\npublic:\n    replication_service_test_app(const dsn::service_app_info *info) : replication_service_app(info)\n    {\n    }\n    virtual error_code start(const std::vector<std::string> &args) override;\n    virtual dsn::error_code stop(bool /*cleanup*/) { return dsn::ERR_OK; }\n\n    // test for cold_backup_context\n    void check_backup_on_remote_test();\n    void read_current_chkpt_file_test();\n    void remote_chkpt_dir_exist_test();\n\n    void upload_checkpoint_to_remote_test();\n    void read_backup_metadata_test();\n    void on_upload_chkpt_dir_test();\n    void write_backup_metadata_test();\n    void write_current_chkpt_file_test();\n};\n"
  },
  {
    "path": "src/replica/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn.replica.test.1.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn.replica.test\n"
  },
  {
    "path": "src/replica/test/throttling_controller_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"utils/throttling_controller.h\"\n\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace replication {\n\nclass throttling_controller_test : public ::testing::Test\n{\npublic:\n    void test_parse_env_basic()\n    {\n        throttling_controller cntl;\n        std::string parse_err;\n        bool env_changed = false;\n        std::string old_value;\n        ASSERT_TRUE(cntl.parse_from_env(\"20000*delay*100\", 4, parse_err, env_changed, old_value));\n        ASSERT_EQ(cntl._cur_units, 0);\n        ASSERT_EQ(cntl._enabled, true);\n        ASSERT_EQ(cntl._delay_ms, 100);\n        ASSERT_EQ(cntl._delay_units, 5000 + 1);\n        ASSERT_EQ(cntl._reject_delay_ms, 0);\n        ASSERT_EQ(cntl._reject_units, 0);\n        ASSERT_EQ(cntl._env_value, \"20000*delay*100\");\n        ASSERT_EQ(cntl._partition_count, 4);\n        ASSERT_EQ(env_changed, true);\n        ASSERT_EQ(old_value, \"\");\n        ASSERT_EQ(parse_err, \"\");\n\n        ASSERT_TRUE(cntl.parse_from_env(\n            \"20000*delay*100,20000*reject*100\", 4, parse_err, env_changed, old_value));\n        ASSERT_EQ(cntl._cur_units, 0);\n        ASSERT_EQ(cntl._enabled, true);\n        ASSERT_EQ(cntl._delay_ms, 100);\n        ASSERT_EQ(cntl._delay_units, 5000 + 1);\n        ASSERT_EQ(cntl._reject_delay_ms, 100);\n        ASSERT_EQ(cntl._reject_units, 5000 + 1);\n        ASSERT_EQ(cntl._env_value, \"20000*delay*100,20000*reject*100\");\n        ASSERT_EQ(cntl._partition_count, 4);\n        ASSERT_EQ(env_changed, true);\n        ASSERT_EQ(old_value, \"20000*delay*100\");\n        ASSERT_EQ(parse_err, \"\");\n\n        // invalid argument\n\n        ASSERT_FALSE(cntl.parse_from_env(\"*delay*100\", 4, parse_err, env_changed, old_value));\n        ASSERT_EQ(env_changed, false);\n        ASSERT_NE(parse_err, \"\");\n        ASSERT_EQ(cntl._enabled, true); // ensure invalid env won't stop throttling\n        ASSERT_EQ(cntl._delay_ms, 100);\n        ASSERT_EQ(cntl._delay_units, 5000 + 1);\n        ASSERT_EQ(cntl._reject_delay_ms, 100);\n        ASSERT_EQ(cntl._reject_units, 5000 + 1);\n\n        ASSERT_FALSE(cntl.parse_from_env(\"\", 4, parse_err, env_changed, old_value));\n        ASSERT_EQ(env_changed, false);\n        ASSERT_NE(parse_err, \"\");\n        ASSERT_EQ(cntl._enabled, true);\n    }\n\n    void test_parse_env_multiplier()\n    {\n        throttling_controller cntl;\n        std::string parse_err;\n        bool env_changed = false;\n        std::string old_value;\n\n        struct test_case_1\n        {\n            std::string env;\n\n            int64_t delay_units;\n            int64_t delay_ms;\n            int64_t reject_units;\n            int64_t reject_ms;\n        } test_cases_1[] = {\n            {\"20K*delay*100\", 5000 + 1, 100, 0, 0},\n            {\"20M*delay*100\", 5000 * 1000 + 1, 100, 0, 0},\n            {\"20M*delay*100,20M*reject*100\", 5000 * 1000 + 1, 100, 5000 * 1000 + 1, 100},\n\n            // throttling size exceeds int32_t max value\n            {\"80000M*delay*100\", int64_t(20) * 1000 * 1000 * 1000 + 1, 100, 0, 0},\n        };\n        for (const auto &tc : test_cases_1) {\n            ASSERT_TRUE(cntl.parse_from_env(tc.env, 4, parse_err, env_changed, old_value));\n            ASSERT_EQ(cntl._enabled, true);\n            ASSERT_EQ(cntl._delay_units, tc.delay_units) << tc.env;\n            ASSERT_EQ(cntl._delay_ms, tc.delay_ms) << tc.env;\n            ASSERT_EQ(cntl._reject_units, tc.reject_units) << tc.env;\n            ASSERT_EQ(cntl._reject_delay_ms, tc.reject_ms) << tc.env;\n            ASSERT_EQ(env_changed, true);\n            ASSERT_EQ(parse_err, \"\");\n        }\n\n        // invalid argument\n\n        std::string test_cases_2[] = {\n            \"20m*delay*100\", \"20B*delay*100\", \"20KB*delay*100\", \"20Mb*delay*100\", \"20MB*delay*100\",\n        };\n        for (const std::string &tc : test_cases_2) {\n            ASSERT_FALSE(cntl.parse_from_env(tc, 4, parse_err, env_changed, old_value));\n            ASSERT_EQ(cntl._enabled, true);\n            ASSERT_EQ(env_changed, false);\n            ASSERT_NE(parse_err, \"\");\n        }\n    }\n};\n\nTEST_F(throttling_controller_test, parse_env_basic) { test_parse_env_basic(); }\n\nTEST_F(throttling_controller_test, parse_env_multiplier) { test_parse_env_multiplier(); }\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/CMakeLists.txt",
    "content": "add_subdirectory(test)\nadd_subdirectory(rpc)\nadd_subdirectory(task)\nadd_subdirectory(security)\n\n# TODO(zlw) remove perf_counter from dsn_runtime after the refactor by WuTao\nadd_library(dsn_runtime STATIC\n        $<TARGET_OBJECTS:dsn.security>\n        $<TARGET_OBJECTS:dsn.rpc>\n        $<TARGET_OBJECTS:dsn.task>\n        $<TARGET_OBJECTS:dsn.perf_counter>\n        core_main.cpp\n        dsn.layer2_types.cpp\n        env.sim.cpp\n        fault_injector.cpp\n        global_config.cpp\n        message_utils.cpp\n        nativerun.cpp\n        profiler.cpp\n        providers.common.cpp\n        scheduler.cpp\n        service_api_c.cpp\n        service_engine.cpp\n        simulator.cpp\n        threadpool_code.cpp\n        tool_api.cpp\n        tracer.cpp\n        zlocks.cpp\n        )\ntarget_link_libraries(dsn_runtime PRIVATE dsn_utils sasl2 gssapi_krb5 krb5)\ndefine_file_basename_for_sources(dsn_runtime)\ninstall(TARGETS dsn_runtime DESTINATION \"lib\")\n"
  },
  {
    "path": "src/runtime/build_config.h",
    "content": "// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style license that can be\n// found in the LICENSE file.\n\n// This file adds defines about the platform we're currently building on.\n//  Operating System:\n//    OS_WIN / OS_MACOSX / OS_LINUX / OS_POSIX (MACOSX or LINUX) /\n//    OS_NACL (NACL_SFI or NACL_NONSFI) / OS_NACL_SFI / OS_NACL_NONSFI\n//  Compiler:\n//    COMPILER_MSVC / COMPILER_GCC\n//  Processor:\n//    ARCH_CPU_X86 / ARCH_CPU_X86_64 / ARCH_CPU_X86_FAMILY (X86 or X86_64)\n//    ARCH_CPU_32_BITS / ARCH_CPU_64_BITS\n\n#pragma once\n\n// A set of macros to use for platform detection.\n#if defined(__native_client__)\n// __native_client__ must be first, so that other OS_ defines are not set.\n#define OS_NACL 1\n// OS_NACL comes in two sandboxing technology flavors, SFI or Non-SFI.\n// PNaCl toolchain defines __native_client_nonsfi__ macro in Non-SFI build\n// mode, while it does not in SFI build mode.\n#if defined(__native_client_nonsfi__)\n#define OS_NACL_NONSFI\n#else\n#define OS_NACL_SFI\n#endif\n#elif defined(ANDROID)\n#define OS_ANDROID 1\n#elif defined(__APPLE__)\n// only include TargetConditions after testing ANDROID as some android builds\n// on mac don't have this header available and it's not needed unless the target\n// is really mac/ios.\n#include <TargetConditionals.h>\n#define OS_MACOSX 1\n#if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE\n#define OS_IOS 1\n#endif // defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE\n#elif defined(__linux__)\n#define OS_LINUX 1\n// include a system header to pull in features.h for glibc/uclibc macros.\n#include <unistd.h>\n#if defined(__GLIBC__) && !defined(__UCLIBC__)\n// we really are using glibc, not uClibc pretending to be glibc\n#define LIBC_GLIBC 1\n#endif\n#elif defined(_WIN32)\n#define OS_WIN 1\n#define TOOLKIT_VIEWS 1\n#elif defined(__FreeBSD__)\n#define OS_FREEBSD 1\n#elif defined(__OpenBSD__)\n#define OS_OPENBSD 1\n#elif defined(__sun)\n#define OS_SOLARIS 1\n#elif defined(__QNXNTO__)\n#define OS_QNX 1\n#else\n#error Please add support for your platform in butil/build_config.h\n#endif\n\n#if defined(USE_OPENSSL_CERTS) && defined(USE_NSS_CERTS)\n#error Cannot use both OpenSSL and NSS for certificates\n#endif\n\n// For access to standard BSD features, use OS_BSD instead of a\n// more specific macro.\n#if defined(OS_FREEBSD) || defined(OS_OPENBSD)\n#define OS_BSD 1\n#endif\n\n// For access to standard POSIXish features, use OS_POSIX instead of a\n// more specific macro.\n#if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_OPENBSD) ||       \\\n    defined(OS_SOLARIS) || defined(OS_ANDROID) || defined(OS_NACL) || defined(OS_QNX)\n#define OS_POSIX 1\n#endif\n\n// Use tcmalloc\n#if (defined(OS_WIN) || defined(OS_LINUX) || defined(OS_ANDROID)) && !defined(NO_TCMALLOC)\n#define USE_TCMALLOC 1\n#endif\n\n// Compiler detection.\n#if defined(__GNUC__)\n#define COMPILER_GCC 1\n#elif defined(_MSC_VER)\n#define COMPILER_MSVC 1\n#else\n#error Please add support for your compiler in butil/build_config.h\n#endif\n\n// Processor architecture detection.  For more info on what's defined, see:\n//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx\n//   http://www.agner.org/optimize/calling_conventions.pdf\n//   or with gcc, run: \"echo | gcc -E -dM -\"\n#if defined(_M_X64) || defined(__x86_64__)\n#define ARCH_CPU_X86_FAMILY 1\n#define ARCH_CPU_X86_64 1\n#define ARCH_CPU_64_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#elif defined(_M_IX86) || defined(__i386__)\n#define ARCH_CPU_X86_FAMILY 1\n#define ARCH_CPU_X86 1\n#define ARCH_CPU_32_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#elif defined(__ARMEL__)\n#define ARCH_CPU_ARM_FAMILY 1\n#define ARCH_CPU_ARMEL 1\n#define ARCH_CPU_32_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#elif defined(__aarch64__)\n#define ARCH_CPU_ARM_FAMILY 1\n#define ARCH_CPU_ARM64 1\n#define ARCH_CPU_64_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#elif defined(__pnacl__)\n#define ARCH_CPU_32_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#elif defined(__MIPSEL__)\n#if defined(__LP64__)\n#define ARCH_CPU_MIPS64_FAMILY 1\n#define ARCH_CPU_MIPS64EL 1\n#define ARCH_CPU_64_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#else\n#define ARCH_CPU_MIPS_FAMILY 1\n#define ARCH_CPU_MIPSEL 1\n#define ARCH_CPU_32_BITS 1\n#define ARCH_CPU_LITTLE_ENDIAN 1\n#endif\n#else\n#error Please add support for your architecture in butil/build_config.h\n#endif\n\n// Type detection for wchar_t.\n#if defined(OS_WIN)\n#define WCHAR_T_IS_UTF16\n#elif defined(OS_POSIX) && defined(COMPILER_GCC) && defined(__WCHAR_MAX__) &&                      \\\n    (__WCHAR_MAX__ == 0x7fffffff || __WCHAR_MAX__ == 0xffffffff)\n#define WCHAR_T_IS_UTF32\n#elif defined(OS_POSIX) && defined(COMPILER_GCC) && defined(__WCHAR_MAX__) &&                      \\\n    (__WCHAR_MAX__ == 0x7fff || __WCHAR_MAX__ == 0xffff)\n// On Posix, we'll detect short wchar_t, but projects aren't guaranteed to\n// compile in this mode (in particular, Chrome doesn't). This is intended for\n// other projects using base who manage their own dependencies and make sure\n// short wchar works for them.\n#define WCHAR_T_IS_UTF16\n#else\n#error Please add support for your compiler in butil/build_config.h\n#endif\n\n#if defined(OS_ANDROID)\n// The compiler thinks std::string::const_iterator and \"const char*\" are\n// equivalent types.\n#define STD_STRING_ITERATOR_IS_CHAR_POINTER\n// The compiler thinks butil::string16::const_iterator and \"char16*\" are\n// equivalent types.\n#define BUTIL_STRING16_ITERATOR_IS_CHAR16_POINTER\n#endif\n\n#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L\n#define BUTIL_CXX11_ENABLED 1\n#endif\n\n#if !defined(BUTIL_CXX11_ENABLED)\n#define nullptr NULL\n#endif\n\n#define HAVE_DLADDR\n"
  },
  {
    "path": "src/runtime/core_main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool/simulator.h>\n#include <dsn/tool/nativerun.h>\n#include <dsn/toollet/tracer.h>\n#include <dsn/toollet/profiler.h>\n#include <dsn/toollet/fault_injector.h>\n\n#include <dsn/tool/providers.common.h>\n\nvoid dsn_core_init()\n{\n    // register all providers\n    dsn::tools::register_common_providers();\n\n    // register all possible tools and toollets\n    dsn::tools::register_tool<dsn::tools::nativerun>(\"nativerun\");\n    dsn::tools::register_tool<dsn::tools::simulator>(\"simulator\");\n    dsn::tools::register_toollet<dsn::tools::tracer>(\"tracer\");\n    dsn::tools::register_toollet<dsn::tools::profiler>(\"profiler\");\n    dsn::tools::register_toollet<dsn::tools::fault_injector>(\"fault_injector\");\n}\n"
  },
  {
    "path": "src/runtime/dsn.layer2_types.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/**\n * Autogenerated by Thrift Compiler (0.9.3)\n *\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n *  @generated\n */\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\n#include <algorithm>\n#include <ostream>\n\n#include <thrift/TToString.h>\n\nnamespace dsn {\n\nint _kapp_statusValues[] = {app_status::AS_INVALID,\n                            app_status::AS_AVAILABLE,\n                            app_status::AS_CREATING,\n                            app_status::AS_CREATE_FAILED,\n                            app_status::AS_DROPPING,\n                            app_status::AS_DROP_FAILED,\n                            app_status::AS_DROPPED,\n                            app_status::AS_RECALLING};\nconst char *_kapp_statusNames[] = {\"AS_INVALID\",\n                                   \"AS_AVAILABLE\",\n                                   \"AS_CREATING\",\n                                   \"AS_CREATE_FAILED\",\n                                   \"AS_DROPPING\",\n                                   \"AS_DROP_FAILED\",\n                                   \"AS_DROPPED\",\n                                   \"AS_RECALLING\"};\nconst std::map<int, const char *> _app_status_VALUES_TO_NAMES(\n    ::apache::thrift::TEnumIterator(8, _kapp_statusValues, _kapp_statusNames),\n    ::apache::thrift::TEnumIterator(-1, NULL, NULL));\n\npartition_configuration::~partition_configuration() throw() {}\n\nvoid partition_configuration::__set_pid(const ::dsn::gpid &val) { this->pid = val; }\n\nvoid partition_configuration::__set_ballot(const int64_t val) { this->ballot = val; }\n\nvoid partition_configuration::__set_max_replica_count(const int32_t val)\n{\n    this->max_replica_count = val;\n}\n\nvoid partition_configuration::__set_primary(const ::dsn::rpc_address &val) { this->primary = val; }\n\nvoid partition_configuration::__set_secondaries(const std::vector<::dsn::rpc_address> &val)\n{\n    this->secondaries = val;\n}\n\nvoid partition_configuration::__set_last_drops(const std::vector<::dsn::rpc_address> &val)\n{\n    this->last_drops = val;\n}\n\nvoid partition_configuration::__set_last_committed_decree(const int64_t val)\n{\n    this->last_committed_decree = val;\n}\n\nvoid partition_configuration::__set_partition_flags(const int32_t val)\n{\n    this->partition_flags = val;\n}\n\nuint32_t partition_configuration::read(::apache::thrift::protocol::TProtocol *iprot)\n{\n\n    apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);\n    uint32_t xfer = 0;\n    std::string fname;\n    ::apache::thrift::protocol::TType ftype;\n    int16_t fid;\n\n    xfer += iprot->readStructBegin(fname);\n\n    using ::apache::thrift::protocol::TProtocolException;\n\n    while (true) {\n        xfer += iprot->readFieldBegin(fname, ftype, fid);\n        if (ftype == ::apache::thrift::protocol::T_STOP) {\n            break;\n        }\n        switch (fid) {\n        case 1:\n            if (ftype == ::apache::thrift::protocol::T_STRUCT) {\n                xfer += this->pid.read(iprot);\n                this->__isset.pid = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 2:\n            if (ftype == ::apache::thrift::protocol::T_I64) {\n                xfer += iprot->readI64(this->ballot);\n                this->__isset.ballot = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 3:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->max_replica_count);\n                this->__isset.max_replica_count = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 4:\n            if (ftype == ::apache::thrift::protocol::T_STRUCT) {\n                xfer += this->primary.read(iprot);\n                this->__isset.primary = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 5:\n            if (ftype == ::apache::thrift::protocol::T_LIST) {\n                {\n                    this->secondaries.clear();\n                    uint32_t _size0;\n                    ::apache::thrift::protocol::TType _etype3;\n                    xfer += iprot->readListBegin(_etype3, _size0);\n                    this->secondaries.resize(_size0);\n                    uint32_t _i4;\n                    for (_i4 = 0; _i4 < _size0; ++_i4) {\n                        xfer += this->secondaries[_i4].read(iprot);\n                    }\n                    xfer += iprot->readListEnd();\n                }\n                this->__isset.secondaries = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 6:\n            if (ftype == ::apache::thrift::protocol::T_LIST) {\n                {\n                    this->last_drops.clear();\n                    uint32_t _size5;\n                    ::apache::thrift::protocol::TType _etype8;\n                    xfer += iprot->readListBegin(_etype8, _size5);\n                    this->last_drops.resize(_size5);\n                    uint32_t _i9;\n                    for (_i9 = 0; _i9 < _size5; ++_i9) {\n                        xfer += this->last_drops[_i9].read(iprot);\n                    }\n                    xfer += iprot->readListEnd();\n                }\n                this->__isset.last_drops = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 7:\n            if (ftype == ::apache::thrift::protocol::T_I64) {\n                xfer += iprot->readI64(this->last_committed_decree);\n                this->__isset.last_committed_decree = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 8:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->partition_flags);\n                this->__isset.partition_flags = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        default:\n            xfer += iprot->skip(ftype);\n            break;\n        }\n        xfer += iprot->readFieldEnd();\n    }\n\n    xfer += iprot->readStructEnd();\n\n    return xfer;\n}\n\nuint32_t partition_configuration::write(::apache::thrift::protocol::TProtocol *oprot) const\n{\n    uint32_t xfer = 0;\n    apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);\n    xfer += oprot->writeStructBegin(\"partition_configuration\");\n\n    xfer += oprot->writeFieldBegin(\"pid\", ::apache::thrift::protocol::T_STRUCT, 1);\n    xfer += this->pid.write(oprot);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"ballot\", ::apache::thrift::protocol::T_I64, 2);\n    xfer += oprot->writeI64(this->ballot);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"max_replica_count\", ::apache::thrift::protocol::T_I32, 3);\n    xfer += oprot->writeI32(this->max_replica_count);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"primary\", ::apache::thrift::protocol::T_STRUCT, 4);\n    xfer += this->primary.write(oprot);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"secondaries\", ::apache::thrift::protocol::T_LIST, 5);\n    {\n        xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT,\n                                      static_cast<uint32_t>(this->secondaries.size()));\n        std::vector<::dsn::rpc_address>::const_iterator _iter10;\n        for (_iter10 = this->secondaries.begin(); _iter10 != this->secondaries.end(); ++_iter10) {\n            xfer += (*_iter10).write(oprot);\n        }\n        xfer += oprot->writeListEnd();\n    }\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"last_drops\", ::apache::thrift::protocol::T_LIST, 6);\n    {\n        xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT,\n                                      static_cast<uint32_t>(this->last_drops.size()));\n        std::vector<::dsn::rpc_address>::const_iterator _iter11;\n        for (_iter11 = this->last_drops.begin(); _iter11 != this->last_drops.end(); ++_iter11) {\n            xfer += (*_iter11).write(oprot);\n        }\n        xfer += oprot->writeListEnd();\n    }\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"last_committed_decree\", ::apache::thrift::protocol::T_I64, 7);\n    xfer += oprot->writeI64(this->last_committed_decree);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"partition_flags\", ::apache::thrift::protocol::T_I32, 8);\n    xfer += oprot->writeI32(this->partition_flags);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldStop();\n    xfer += oprot->writeStructEnd();\n    return xfer;\n}\n\nvoid swap(partition_configuration &a, partition_configuration &b)\n{\n    using ::std::swap;\n    swap(a.pid, b.pid);\n    swap(a.ballot, b.ballot);\n    swap(a.max_replica_count, b.max_replica_count);\n    swap(a.primary, b.primary);\n    swap(a.secondaries, b.secondaries);\n    swap(a.last_drops, b.last_drops);\n    swap(a.last_committed_decree, b.last_committed_decree);\n    swap(a.partition_flags, b.partition_flags);\n    swap(a.__isset, b.__isset);\n}\n\npartition_configuration::partition_configuration(const partition_configuration &other12)\n{\n    pid = other12.pid;\n    ballot = other12.ballot;\n    max_replica_count = other12.max_replica_count;\n    primary = other12.primary;\n    secondaries = other12.secondaries;\n    last_drops = other12.last_drops;\n    last_committed_decree = other12.last_committed_decree;\n    partition_flags = other12.partition_flags;\n    __isset = other12.__isset;\n}\npartition_configuration::partition_configuration(partition_configuration &&other13)\n{\n    pid = std::move(other13.pid);\n    ballot = std::move(other13.ballot);\n    max_replica_count = std::move(other13.max_replica_count);\n    primary = std::move(other13.primary);\n    secondaries = std::move(other13.secondaries);\n    last_drops = std::move(other13.last_drops);\n    last_committed_decree = std::move(other13.last_committed_decree);\n    partition_flags = std::move(other13.partition_flags);\n    __isset = std::move(other13.__isset);\n}\npartition_configuration &partition_configuration::operator=(const partition_configuration &other14)\n{\n    pid = other14.pid;\n    ballot = other14.ballot;\n    max_replica_count = other14.max_replica_count;\n    primary = other14.primary;\n    secondaries = other14.secondaries;\n    last_drops = other14.last_drops;\n    last_committed_decree = other14.last_committed_decree;\n    partition_flags = other14.partition_flags;\n    __isset = other14.__isset;\n    return *this;\n}\npartition_configuration &partition_configuration::operator=(partition_configuration &&other15)\n{\n    pid = std::move(other15.pid);\n    ballot = std::move(other15.ballot);\n    max_replica_count = std::move(other15.max_replica_count);\n    primary = std::move(other15.primary);\n    secondaries = std::move(other15.secondaries);\n    last_drops = std::move(other15.last_drops);\n    last_committed_decree = std::move(other15.last_committed_decree);\n    partition_flags = std::move(other15.partition_flags);\n    __isset = std::move(other15.__isset);\n    return *this;\n}\nvoid partition_configuration::printTo(std::ostream &out) const\n{\n    using ::apache::thrift::to_string;\n    out << \"partition_configuration(\";\n    out << \"pid=\" << to_string(pid);\n    out << \", \"\n        << \"ballot=\" << to_string(ballot);\n    out << \", \"\n        << \"max_replica_count=\" << to_string(max_replica_count);\n    out << \", \"\n        << \"primary=\" << to_string(primary);\n    out << \", \"\n        << \"secondaries=\" << to_string(secondaries);\n    out << \", \"\n        << \"last_drops=\" << to_string(last_drops);\n    out << \", \"\n        << \"last_committed_decree=\" << to_string(last_committed_decree);\n    out << \", \"\n        << \"partition_flags=\" << to_string(partition_flags);\n    out << \")\";\n}\n\nconfiguration_query_by_index_request::~configuration_query_by_index_request() throw() {}\n\nvoid configuration_query_by_index_request::__set_app_name(const std::string &val)\n{\n    this->app_name = val;\n}\n\nvoid configuration_query_by_index_request::__set_partition_indices(const std::vector<int32_t> &val)\n{\n    this->partition_indices = val;\n}\n\nuint32_t configuration_query_by_index_request::read(::apache::thrift::protocol::TProtocol *iprot)\n{\n\n    apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);\n    uint32_t xfer = 0;\n    std::string fname;\n    ::apache::thrift::protocol::TType ftype;\n    int16_t fid;\n\n    xfer += iprot->readStructBegin(fname);\n\n    using ::apache::thrift::protocol::TProtocolException;\n\n    while (true) {\n        xfer += iprot->readFieldBegin(fname, ftype, fid);\n        if (ftype == ::apache::thrift::protocol::T_STOP) {\n            break;\n        }\n        switch (fid) {\n        case 1:\n            if (ftype == ::apache::thrift::protocol::T_STRING) {\n                xfer += iprot->readString(this->app_name);\n                this->__isset.app_name = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 2:\n            if (ftype == ::apache::thrift::protocol::T_LIST) {\n                {\n                    this->partition_indices.clear();\n                    uint32_t _size16;\n                    ::apache::thrift::protocol::TType _etype19;\n                    xfer += iprot->readListBegin(_etype19, _size16);\n                    this->partition_indices.resize(_size16);\n                    uint32_t _i20;\n                    for (_i20 = 0; _i20 < _size16; ++_i20) {\n                        xfer += iprot->readI32(this->partition_indices[_i20]);\n                    }\n                    xfer += iprot->readListEnd();\n                }\n                this->__isset.partition_indices = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        default:\n            xfer += iprot->skip(ftype);\n            break;\n        }\n        xfer += iprot->readFieldEnd();\n    }\n\n    xfer += iprot->readStructEnd();\n\n    return xfer;\n}\n\nuint32_t\nconfiguration_query_by_index_request::write(::apache::thrift::protocol::TProtocol *oprot) const\n{\n    uint32_t xfer = 0;\n    apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);\n    xfer += oprot->writeStructBegin(\"configuration_query_by_index_request\");\n\n    xfer += oprot->writeFieldBegin(\"app_name\", ::apache::thrift::protocol::T_STRING, 1);\n    xfer += oprot->writeString(this->app_name);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"partition_indices\", ::apache::thrift::protocol::T_LIST, 2);\n    {\n        xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32,\n                                      static_cast<uint32_t>(this->partition_indices.size()));\n        std::vector<int32_t>::const_iterator _iter21;\n        for (_iter21 = this->partition_indices.begin(); _iter21 != this->partition_indices.end();\n             ++_iter21) {\n            xfer += oprot->writeI32((*_iter21));\n        }\n        xfer += oprot->writeListEnd();\n    }\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldStop();\n    xfer += oprot->writeStructEnd();\n    return xfer;\n}\n\nvoid swap(configuration_query_by_index_request &a, configuration_query_by_index_request &b)\n{\n    using ::std::swap;\n    swap(a.app_name, b.app_name);\n    swap(a.partition_indices, b.partition_indices);\n    swap(a.__isset, b.__isset);\n}\n\nconfiguration_query_by_index_request::configuration_query_by_index_request(\n    const configuration_query_by_index_request &other22)\n{\n    app_name = other22.app_name;\n    partition_indices = other22.partition_indices;\n    __isset = other22.__isset;\n}\nconfiguration_query_by_index_request::configuration_query_by_index_request(\n    configuration_query_by_index_request &&other23)\n{\n    app_name = std::move(other23.app_name);\n    partition_indices = std::move(other23.partition_indices);\n    __isset = std::move(other23.__isset);\n}\nconfiguration_query_by_index_request &configuration_query_by_index_request::\noperator=(const configuration_query_by_index_request &other24)\n{\n    app_name = other24.app_name;\n    partition_indices = other24.partition_indices;\n    __isset = other24.__isset;\n    return *this;\n}\nconfiguration_query_by_index_request &configuration_query_by_index_request::\noperator=(configuration_query_by_index_request &&other25)\n{\n    app_name = std::move(other25.app_name);\n    partition_indices = std::move(other25.partition_indices);\n    __isset = std::move(other25.__isset);\n    return *this;\n}\nvoid configuration_query_by_index_request::printTo(std::ostream &out) const\n{\n    using ::apache::thrift::to_string;\n    out << \"configuration_query_by_index_request(\";\n    out << \"app_name=\" << to_string(app_name);\n    out << \", \"\n        << \"partition_indices=\" << to_string(partition_indices);\n    out << \")\";\n}\n\nconfiguration_query_by_index_response::~configuration_query_by_index_response() throw() {}\n\nvoid configuration_query_by_index_response::__set_err(const ::dsn::error_code &val)\n{\n    this->err = val;\n}\n\nvoid configuration_query_by_index_response::__set_app_id(const int32_t val) { this->app_id = val; }\n\nvoid configuration_query_by_index_response::__set_partition_count(const int32_t val)\n{\n    this->partition_count = val;\n}\n\nvoid configuration_query_by_index_response::__set_is_stateful(const bool val)\n{\n    this->is_stateful = val;\n}\n\nvoid configuration_query_by_index_response::__set_partitions(\n    const std::vector<partition_configuration> &val)\n{\n    this->partitions = val;\n}\n\nuint32_t configuration_query_by_index_response::read(::apache::thrift::protocol::TProtocol *iprot)\n{\n\n    apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);\n    uint32_t xfer = 0;\n    std::string fname;\n    ::apache::thrift::protocol::TType ftype;\n    int16_t fid;\n\n    xfer += iprot->readStructBegin(fname);\n\n    using ::apache::thrift::protocol::TProtocolException;\n\n    while (true) {\n        xfer += iprot->readFieldBegin(fname, ftype, fid);\n        if (ftype == ::apache::thrift::protocol::T_STOP) {\n            break;\n        }\n        switch (fid) {\n        case 1:\n            if (ftype == ::apache::thrift::protocol::T_STRUCT) {\n                xfer += this->err.read(iprot);\n                this->__isset.err = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 2:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->app_id);\n                this->__isset.app_id = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 3:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->partition_count);\n                this->__isset.partition_count = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 4:\n            if (ftype == ::apache::thrift::protocol::T_BOOL) {\n                xfer += iprot->readBool(this->is_stateful);\n                this->__isset.is_stateful = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 5:\n            if (ftype == ::apache::thrift::protocol::T_LIST) {\n                {\n                    this->partitions.clear();\n                    uint32_t _size26;\n                    ::apache::thrift::protocol::TType _etype29;\n                    xfer += iprot->readListBegin(_etype29, _size26);\n                    this->partitions.resize(_size26);\n                    uint32_t _i30;\n                    for (_i30 = 0; _i30 < _size26; ++_i30) {\n                        xfer += this->partitions[_i30].read(iprot);\n                    }\n                    xfer += iprot->readListEnd();\n                }\n                this->__isset.partitions = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        default:\n            xfer += iprot->skip(ftype);\n            break;\n        }\n        xfer += iprot->readFieldEnd();\n    }\n\n    xfer += iprot->readStructEnd();\n\n    return xfer;\n}\n\nuint32_t\nconfiguration_query_by_index_response::write(::apache::thrift::protocol::TProtocol *oprot) const\n{\n    uint32_t xfer = 0;\n    apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);\n    xfer += oprot->writeStructBegin(\"configuration_query_by_index_response\");\n\n    xfer += oprot->writeFieldBegin(\"err\", ::apache::thrift::protocol::T_STRUCT, 1);\n    xfer += this->err.write(oprot);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"app_id\", ::apache::thrift::protocol::T_I32, 2);\n    xfer += oprot->writeI32(this->app_id);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"partition_count\", ::apache::thrift::protocol::T_I32, 3);\n    xfer += oprot->writeI32(this->partition_count);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"is_stateful\", ::apache::thrift::protocol::T_BOOL, 4);\n    xfer += oprot->writeBool(this->is_stateful);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"partitions\", ::apache::thrift::protocol::T_LIST, 5);\n    {\n        xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT,\n                                      static_cast<uint32_t>(this->partitions.size()));\n        std::vector<partition_configuration>::const_iterator _iter31;\n        for (_iter31 = this->partitions.begin(); _iter31 != this->partitions.end(); ++_iter31) {\n            xfer += (*_iter31).write(oprot);\n        }\n        xfer += oprot->writeListEnd();\n    }\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldStop();\n    xfer += oprot->writeStructEnd();\n    return xfer;\n}\n\nvoid swap(configuration_query_by_index_response &a, configuration_query_by_index_response &b)\n{\n    using ::std::swap;\n    swap(a.err, b.err);\n    swap(a.app_id, b.app_id);\n    swap(a.partition_count, b.partition_count);\n    swap(a.is_stateful, b.is_stateful);\n    swap(a.partitions, b.partitions);\n    swap(a.__isset, b.__isset);\n}\n\nconfiguration_query_by_index_response::configuration_query_by_index_response(\n    const configuration_query_by_index_response &other32)\n{\n    err = other32.err;\n    app_id = other32.app_id;\n    partition_count = other32.partition_count;\n    is_stateful = other32.is_stateful;\n    partitions = other32.partitions;\n    __isset = other32.__isset;\n}\nconfiguration_query_by_index_response::configuration_query_by_index_response(\n    configuration_query_by_index_response &&other33)\n{\n    err = std::move(other33.err);\n    app_id = std::move(other33.app_id);\n    partition_count = std::move(other33.partition_count);\n    is_stateful = std::move(other33.is_stateful);\n    partitions = std::move(other33.partitions);\n    __isset = std::move(other33.__isset);\n}\nconfiguration_query_by_index_response &configuration_query_by_index_response::\noperator=(const configuration_query_by_index_response &other34)\n{\n    err = other34.err;\n    app_id = other34.app_id;\n    partition_count = other34.partition_count;\n    is_stateful = other34.is_stateful;\n    partitions = other34.partitions;\n    __isset = other34.__isset;\n    return *this;\n}\nconfiguration_query_by_index_response &configuration_query_by_index_response::\noperator=(configuration_query_by_index_response &&other35)\n{\n    err = std::move(other35.err);\n    app_id = std::move(other35.app_id);\n    partition_count = std::move(other35.partition_count);\n    is_stateful = std::move(other35.is_stateful);\n    partitions = std::move(other35.partitions);\n    __isset = std::move(other35.__isset);\n    return *this;\n}\nvoid configuration_query_by_index_response::printTo(std::ostream &out) const\n{\n    using ::apache::thrift::to_string;\n    out << \"configuration_query_by_index_response(\";\n    out << \"err=\" << to_string(err);\n    out << \", \"\n        << \"app_id=\" << to_string(app_id);\n    out << \", \"\n        << \"partition_count=\" << to_string(partition_count);\n    out << \", \"\n        << \"is_stateful=\" << to_string(is_stateful);\n    out << \", \"\n        << \"partitions=\" << to_string(partitions);\n    out << \")\";\n}\n\napp_info::~app_info() throw() {}\n\nvoid app_info::__set_status(const app_status::type val) { this->status = val; }\n\nvoid app_info::__set_app_type(const std::string &val) { this->app_type = val; }\n\nvoid app_info::__set_app_name(const std::string &val) { this->app_name = val; }\n\nvoid app_info::__set_app_id(const int32_t val) { this->app_id = val; }\n\nvoid app_info::__set_partition_count(const int32_t val) { this->partition_count = val; }\n\nvoid app_info::__set_envs(const std::map<std::string, std::string> &val) { this->envs = val; }\n\nvoid app_info::__set_is_stateful(const bool val) { this->is_stateful = val; }\n\nvoid app_info::__set_max_replica_count(const int32_t val) { this->max_replica_count = val; }\n\nvoid app_info::__set_expire_second(const int64_t val) { this->expire_second = val; }\n\nvoid app_info::__set_create_second(const int64_t val) { this->create_second = val; }\n\nvoid app_info::__set_drop_second(const int64_t val) { this->drop_second = val; }\n\nvoid app_info::__set_duplicating(const bool val)\n{\n    this->duplicating = val;\n    __isset.duplicating = true;\n}\n\nvoid app_info::__set_init_partition_count(const int32_t val) { this->init_partition_count = val; }\n\nvoid app_info::__set_is_bulk_loading(const bool val)\n{\n    this->is_bulk_loading = val;\n    __isset.is_bulk_loading = true;\n}\n\nuint32_t app_info::read(::apache::thrift::protocol::TProtocol *iprot)\n{\n\n    apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);\n    uint32_t xfer = 0;\n    std::string fname;\n    ::apache::thrift::protocol::TType ftype;\n    int16_t fid;\n\n    xfer += iprot->readStructBegin(fname);\n\n    using ::apache::thrift::protocol::TProtocolException;\n\n    while (true) {\n        xfer += iprot->readFieldBegin(fname, ftype, fid);\n        if (ftype == ::apache::thrift::protocol::T_STOP) {\n            break;\n        }\n        switch (fid) {\n        case 1:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                int32_t ecast36;\n                xfer += iprot->readI32(ecast36);\n                this->status = (app_status::type)ecast36;\n                this->__isset.status = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 2:\n            if (ftype == ::apache::thrift::protocol::T_STRING) {\n                xfer += iprot->readString(this->app_type);\n                this->__isset.app_type = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 3:\n            if (ftype == ::apache::thrift::protocol::T_STRING) {\n                xfer += iprot->readString(this->app_name);\n                this->__isset.app_name = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 4:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->app_id);\n                this->__isset.app_id = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 5:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->partition_count);\n                this->__isset.partition_count = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 6:\n            if (ftype == ::apache::thrift::protocol::T_MAP) {\n                {\n                    this->envs.clear();\n                    uint32_t _size37;\n                    ::apache::thrift::protocol::TType _ktype38;\n                    ::apache::thrift::protocol::TType _vtype39;\n                    xfer += iprot->readMapBegin(_ktype38, _vtype39, _size37);\n                    uint32_t _i41;\n                    for (_i41 = 0; _i41 < _size37; ++_i41) {\n                        std::string _key42;\n                        xfer += iprot->readString(_key42);\n                        std::string &_val43 = this->envs[_key42];\n                        xfer += iprot->readString(_val43);\n                    }\n                    xfer += iprot->readMapEnd();\n                }\n                this->__isset.envs = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 7:\n            if (ftype == ::apache::thrift::protocol::T_BOOL) {\n                xfer += iprot->readBool(this->is_stateful);\n                this->__isset.is_stateful = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 8:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->max_replica_count);\n                this->__isset.max_replica_count = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 9:\n            if (ftype == ::apache::thrift::protocol::T_I64) {\n                xfer += iprot->readI64(this->expire_second);\n                this->__isset.expire_second = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 10:\n            if (ftype == ::apache::thrift::protocol::T_I64) {\n                xfer += iprot->readI64(this->create_second);\n                this->__isset.create_second = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 11:\n            if (ftype == ::apache::thrift::protocol::T_I64) {\n                xfer += iprot->readI64(this->drop_second);\n                this->__isset.drop_second = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 12:\n            if (ftype == ::apache::thrift::protocol::T_BOOL) {\n                xfer += iprot->readBool(this->duplicating);\n                this->__isset.duplicating = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 13:\n            if (ftype == ::apache::thrift::protocol::T_I32) {\n                xfer += iprot->readI32(this->init_partition_count);\n                this->__isset.init_partition_count = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        case 14:\n            if (ftype == ::apache::thrift::protocol::T_BOOL) {\n                xfer += iprot->readBool(this->is_bulk_loading);\n                this->__isset.is_bulk_loading = true;\n            } else {\n                xfer += iprot->skip(ftype);\n            }\n            break;\n        default:\n            xfer += iprot->skip(ftype);\n            break;\n        }\n        xfer += iprot->readFieldEnd();\n    }\n\n    xfer += iprot->readStructEnd();\n\n    return xfer;\n}\n\nuint32_t app_info::write(::apache::thrift::protocol::TProtocol *oprot) const\n{\n    uint32_t xfer = 0;\n    apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot);\n    xfer += oprot->writeStructBegin(\"app_info\");\n\n    xfer += oprot->writeFieldBegin(\"status\", ::apache::thrift::protocol::T_I32, 1);\n    xfer += oprot->writeI32((int32_t)this->status);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"app_type\", ::apache::thrift::protocol::T_STRING, 2);\n    xfer += oprot->writeString(this->app_type);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"app_name\", ::apache::thrift::protocol::T_STRING, 3);\n    xfer += oprot->writeString(this->app_name);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"app_id\", ::apache::thrift::protocol::T_I32, 4);\n    xfer += oprot->writeI32(this->app_id);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"partition_count\", ::apache::thrift::protocol::T_I32, 5);\n    xfer += oprot->writeI32(this->partition_count);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"envs\", ::apache::thrift::protocol::T_MAP, 6);\n    {\n        xfer += oprot->writeMapBegin(::apache::thrift::protocol::T_STRING,\n                                     ::apache::thrift::protocol::T_STRING,\n                                     static_cast<uint32_t>(this->envs.size()));\n        std::map<std::string, std::string>::const_iterator _iter44;\n        for (_iter44 = this->envs.begin(); _iter44 != this->envs.end(); ++_iter44) {\n            xfer += oprot->writeString(_iter44->first);\n            xfer += oprot->writeString(_iter44->second);\n        }\n        xfer += oprot->writeMapEnd();\n    }\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"is_stateful\", ::apache::thrift::protocol::T_BOOL, 7);\n    xfer += oprot->writeBool(this->is_stateful);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"max_replica_count\", ::apache::thrift::protocol::T_I32, 8);\n    xfer += oprot->writeI32(this->max_replica_count);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"expire_second\", ::apache::thrift::protocol::T_I64, 9);\n    xfer += oprot->writeI64(this->expire_second);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"create_second\", ::apache::thrift::protocol::T_I64, 10);\n    xfer += oprot->writeI64(this->create_second);\n    xfer += oprot->writeFieldEnd();\n\n    xfer += oprot->writeFieldBegin(\"drop_second\", ::apache::thrift::protocol::T_I64, 11);\n    xfer += oprot->writeI64(this->drop_second);\n    xfer += oprot->writeFieldEnd();\n\n    if (this->__isset.duplicating) {\n        xfer += oprot->writeFieldBegin(\"duplicating\", ::apache::thrift::protocol::T_BOOL, 12);\n        xfer += oprot->writeBool(this->duplicating);\n        xfer += oprot->writeFieldEnd();\n    }\n    xfer += oprot->writeFieldBegin(\"init_partition_count\", ::apache::thrift::protocol::T_I32, 13);\n    xfer += oprot->writeI32(this->init_partition_count);\n    xfer += oprot->writeFieldEnd();\n\n    if (this->__isset.is_bulk_loading) {\n        xfer += oprot->writeFieldBegin(\"is_bulk_loading\", ::apache::thrift::protocol::T_BOOL, 14);\n        xfer += oprot->writeBool(this->is_bulk_loading);\n        xfer += oprot->writeFieldEnd();\n    }\n    xfer += oprot->writeFieldStop();\n    xfer += oprot->writeStructEnd();\n    return xfer;\n}\n\nvoid swap(app_info &a, app_info &b)\n{\n    using ::std::swap;\n    swap(a.status, b.status);\n    swap(a.app_type, b.app_type);\n    swap(a.app_name, b.app_name);\n    swap(a.app_id, b.app_id);\n    swap(a.partition_count, b.partition_count);\n    swap(a.envs, b.envs);\n    swap(a.is_stateful, b.is_stateful);\n    swap(a.max_replica_count, b.max_replica_count);\n    swap(a.expire_second, b.expire_second);\n    swap(a.create_second, b.create_second);\n    swap(a.drop_second, b.drop_second);\n    swap(a.duplicating, b.duplicating);\n    swap(a.init_partition_count, b.init_partition_count);\n    swap(a.is_bulk_loading, b.is_bulk_loading);\n    swap(a.__isset, b.__isset);\n}\n\napp_info::app_info(const app_info &other45)\n{\n    status = other45.status;\n    app_type = other45.app_type;\n    app_name = other45.app_name;\n    app_id = other45.app_id;\n    partition_count = other45.partition_count;\n    envs = other45.envs;\n    is_stateful = other45.is_stateful;\n    max_replica_count = other45.max_replica_count;\n    expire_second = other45.expire_second;\n    create_second = other45.create_second;\n    drop_second = other45.drop_second;\n    duplicating = other45.duplicating;\n    init_partition_count = other45.init_partition_count;\n    is_bulk_loading = other45.is_bulk_loading;\n    __isset = other45.__isset;\n}\napp_info::app_info(app_info &&other46)\n{\n    status = std::move(other46.status);\n    app_type = std::move(other46.app_type);\n    app_name = std::move(other46.app_name);\n    app_id = std::move(other46.app_id);\n    partition_count = std::move(other46.partition_count);\n    envs = std::move(other46.envs);\n    is_stateful = std::move(other46.is_stateful);\n    max_replica_count = std::move(other46.max_replica_count);\n    expire_second = std::move(other46.expire_second);\n    create_second = std::move(other46.create_second);\n    drop_second = std::move(other46.drop_second);\n    duplicating = std::move(other46.duplicating);\n    init_partition_count = std::move(other46.init_partition_count);\n    is_bulk_loading = std::move(other46.is_bulk_loading);\n    __isset = std::move(other46.__isset);\n}\napp_info &app_info::operator=(const app_info &other47)\n{\n    status = other47.status;\n    app_type = other47.app_type;\n    app_name = other47.app_name;\n    app_id = other47.app_id;\n    partition_count = other47.partition_count;\n    envs = other47.envs;\n    is_stateful = other47.is_stateful;\n    max_replica_count = other47.max_replica_count;\n    expire_second = other47.expire_second;\n    create_second = other47.create_second;\n    drop_second = other47.drop_second;\n    duplicating = other47.duplicating;\n    init_partition_count = other47.init_partition_count;\n    is_bulk_loading = other47.is_bulk_loading;\n    __isset = other47.__isset;\n    return *this;\n}\napp_info &app_info::operator=(app_info &&other48)\n{\n    status = std::move(other48.status);\n    app_type = std::move(other48.app_type);\n    app_name = std::move(other48.app_name);\n    app_id = std::move(other48.app_id);\n    partition_count = std::move(other48.partition_count);\n    envs = std::move(other48.envs);\n    is_stateful = std::move(other48.is_stateful);\n    max_replica_count = std::move(other48.max_replica_count);\n    expire_second = std::move(other48.expire_second);\n    create_second = std::move(other48.create_second);\n    drop_second = std::move(other48.drop_second);\n    duplicating = std::move(other48.duplicating);\n    init_partition_count = std::move(other48.init_partition_count);\n    is_bulk_loading = std::move(other48.is_bulk_loading);\n    __isset = std::move(other48.__isset);\n    return *this;\n}\nvoid app_info::printTo(std::ostream &out) const\n{\n    using ::apache::thrift::to_string;\n    out << \"app_info(\";\n    out << \"status=\" << to_string(status);\n    out << \", \"\n        << \"app_type=\" << to_string(app_type);\n    out << \", \"\n        << \"app_name=\" << to_string(app_name);\n    out << \", \"\n        << \"app_id=\" << to_string(app_id);\n    out << \", \"\n        << \"partition_count=\" << to_string(partition_count);\n    out << \", \"\n        << \"envs=\" << to_string(envs);\n    out << \", \"\n        << \"is_stateful=\" << to_string(is_stateful);\n    out << \", \"\n        << \"max_replica_count=\" << to_string(max_replica_count);\n    out << \", \"\n        << \"expire_second=\" << to_string(expire_second);\n    out << \", \"\n        << \"create_second=\" << to_string(create_second);\n    out << \", \"\n        << \"drop_second=\" << to_string(drop_second);\n    out << \", \"\n        << \"duplicating=\";\n    (__isset.duplicating ? (out << to_string(duplicating)) : (out << \"<null>\"));\n    out << \", \"\n        << \"init_partition_count=\" << to_string(init_partition_count);\n    out << \", \"\n        << \"is_bulk_loading=\";\n    (__isset.is_bulk_loading ? (out << to_string(is_bulk_loading)) : (out << \"<null>\"));\n    out << \")\";\n}\n\n} // namespace\n"
  },
  {
    "path": "src/runtime/env.sim.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"env.sim.h\"\n#include \"scheduler.h\"\n\n#include <dsn/utility/rand.h>\n\nnamespace dsn {\nnamespace tools {\n\n/*static*/ int sim_env_provider::_seed;\n\nvoid sim_env_provider::on_worker_start(task_worker *worker)\n{\n    rand::reseed_thread_local_rng(\n        (_seed + worker->index() + worker->index() * worker->pool_spec().pool_code) ^\n        worker->index());\n}\n\nsim_env_provider::sim_env_provider(env_provider *inner_provider) : env_provider(inner_provider)\n{\n    task_worker::on_start.put_front(on_worker_start, \"sim_env_provider::on_worker_start\");\n\n    _seed =\n        (int)dsn_config_get_value_uint64(\"tools.simulator\",\n                                         \"random_seed\",\n                                         0,\n                                         \"random seed for the simulator, 0 for random random seed\");\n    if (_seed == 0) {\n        _seed = std::random_device{}();\n    }\n\n    derror(\"simulation.random seed for this round is %d\", _seed);\n}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/env.sim.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace tools {\n\nclass sim_env_provider : public env_provider\n{\npublic:\n    sim_env_provider(env_provider *inner_provider);\n    static int seed() { return _seed; }\n\nprivate:\n    static void on_worker_start(task_worker *worker);\n    static int _seed;\n};\n}\n} // end namespace\n"
  },
  {
    "path": "src/runtime/fault_injector.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     inject failure through join points to mimic all network/disk/slow execution etc. failures\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/toollet/fault_injector.h>\n#include <dsn/service_api_c.h>\n#include <dsn/utility/rand.h>\n#include <dsn/tool-api/aio_task.h>\n\nnamespace dsn {\nnamespace tools {\n\nstruct fj_opt\n{\n    bool fault_injection_enabled;\n\n    // io failure\n    double rpc_request_data_corrupted_ratio;\n    double rpc_response_data_corrupted_ratio;\n    std::string rpc_message_data_corrupted_type;\n\n    double rpc_request_drop_ratio;\n    double rpc_response_drop_ratio;\n    double rpc_request_delay_ratio;\n    double rpc_response_delay_ratio;\n    double disk_read_fail_ratio;\n    double disk_write_fail_ratio;\n\n    // delay\n    uint32_t rpc_message_delay_ms_min;\n    uint32_t rpc_message_delay_ms_max;\n    uint32_t disk_io_delay_ms_min;\n    uint32_t disk_io_delay_ms_max;\n    uint32_t execution_extra_delay_us_max;\n    uint32_t execution_extra_delay_us_min;\n\n    // node crash\n    uint32_t node_crash_minutes_min;\n    uint32_t node_crash_minutes_max;\n    uint32_t node_crash_minutes_recover_min;\n    uint32_t node_crash_minutes_recover_max;\n    bool node_crashed;\n};\n\nCONFIG_BEGIN(fj_opt)\nCONFIG_FLD(bool, bool, fault_injection_enabled, true, \"whether enable fault injection\")\n\nCONFIG_FLD(double,\n           double,\n           rpc_request_data_corrupted_ratio,\n           0,\n           \"data corrupted ratio for rpc request message\")\nCONFIG_FLD(double,\n           double,\n           rpc_response_data_corrupted_ratio,\n           0,\n           \"data corrupted ratio for rpc response message\")\nCONFIG_FLD_STRING(rpc_message_data_corrupted_type,\n                  \"random\",\n                  \"data corrupted type: random/header/body\")\n\nCONFIG_FLD(double, double, rpc_request_drop_ratio, 0, \"drop ratio for rpc request messages\")\nCONFIG_FLD(double, double, rpc_response_drop_ratio, 0, \"drop ratio for rpc response messages\")\nCONFIG_FLD(double, double, rpc_request_delay_ratio, 0, \"delay ratio for rpc request messages\")\nCONFIG_FLD(double, double, rpc_response_delay_ratio, 0, \"delay ratio for rpc response messages\")\nCONFIG_FLD(double, double, disk_read_fail_ratio, 0.000001, \"failure ratio for disk read operations\")\nCONFIG_FLD(\n    double, double, disk_write_fail_ratio, 0.000001, \"failure ratio for disk write operations\")\n\nCONFIG_FLD(\n    uint32_t, uint64, rpc_message_delay_ms_min, 0, \"miminum message delay (ms) for rpc messages\")\nCONFIG_FLD(\n    uint32_t, uint64, rpc_message_delay_ms_max, 1000, \"maximum message delay (ms) for rpc messages\")\nCONFIG_FLD(uint32_t, uint64, disk_io_delay_ms_min, 1, \"miminum disk operation delay (ms)\")\nCONFIG_FLD(uint32_t, uint64, disk_io_delay_ms_max, 12, \"maximum disk operation delay (ms)\")\nCONFIG_FLD(uint32_t,\n           uint64,\n           execution_extra_delay_us_min,\n           0,\n           \"extra execution time delay (us) for this task\")\nCONFIG_FLD(uint32_t,\n           uint64,\n           execution_extra_delay_us_max,\n           0,\n           \"extra execution time delay (us) for this task\")\n\nCONFIG_FLD(uint32_t,\n           uint64,\n           node_crash_minutes_min,\n           40,\n           \"every minimum period (mins) the node should crash\")\nCONFIG_FLD(uint32_t,\n           uint64,\n           node_crash_minutes_max,\n           60,\n           \"every maximum period (mins) the node should crash\")\nCONFIG_FLD(\n    uint32_t, uint64, node_crash_minutes_recover_min, 1, \"minimum recovery time (ms) for the node\")\nCONFIG_FLD(\n    uint32_t, uint64, node_crash_minutes_recover_max, 4, \"minimum recovery time (ms) for the node\")\nCONFIG_FLD(bool, bool, node_crashed, false, \"whether to enable node crash\")\nCONFIG_END\n\nstatic fj_opt *s_fj_opts = nullptr;\n\ntypedef uint64_extension_helper<fj_opt, task> task_ext_for_fj;\n\nstatic void fault_on_task_enqueue(task *caller, task *callee) {}\n\nstatic void fault_on_task_begin(task *this_)\n{\n    fj_opt &opt = s_fj_opts[this_->spec().code];\n    if (opt.execution_extra_delay_us_max > 0) {\n        auto d = rand::next_u32(0, opt.execution_extra_delay_us_max);\n        ddebug(\n            \"fault inject %s at %s with delay %u us\", this_->spec().name.c_str(), __FUNCTION__, d);\n        std::this_thread::sleep_for(std::chrono::microseconds(d));\n    }\n}\n\nstatic void fault_on_task_end(task *this_) {}\n\nstatic void fault_on_task_cancelled(task *this_) {}\n\nstatic void fault_on_task_wait_pre(task *caller, task *callee, uint32_t timeout_ms) {}\n\nstatic void fault_on_task_wait_post(task *caller, task *callee, bool succ) {}\n\nstatic void fault_on_task_cancel_post(task *caller, task *callee, bool succ) {}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic bool fault_on_aio_call(task *caller, aio_task *callee)\n{\n    switch (callee->get_aio_context()->type) {\n    case AIO_Read:\n        if (rand::next_double01() < s_fj_opts[callee->spec().code].disk_read_fail_ratio) {\n            ddebug(\"fault inject %s at %s\", callee->spec().name.c_str(), __FUNCTION__);\n            callee->set_error_code(ERR_FILE_OPERATION_FAILED);\n            return false;\n        }\n        break;\n    case AIO_Write:\n        if (rand::next_double01() < s_fj_opts[callee->spec().code].disk_write_fail_ratio) {\n            ddebug(\"fault inject %s at %s\", callee->spec().name.c_str(), __FUNCTION__);\n            callee->set_error_code(ERR_FILE_OPERATION_FAILED);\n            return false;\n        }\n        break;\n    default:\n        break;\n    }\n\n    return true;\n}\n\nstatic void fault_on_aio_enqueue(aio_task *this_)\n{\n    fj_opt &opt = s_fj_opts[this_->spec().code];\n    if (this_->delay_milliseconds() == 0 && task_ext_for_fj::get(this_) == 0) {\n        this_->set_delay(rand::next_u32(opt.disk_io_delay_ms_min, opt.disk_io_delay_ms_max));\n        ddebug(\"fault inject %s at %s with delay %u ms\",\n               this_->spec().name.c_str(),\n               __FUNCTION__,\n               this_->delay_milliseconds());\n        task_ext_for_fj::get(this_) = 1; // ensure only fd once\n    }\n}\n\nstatic void replace_value(std::vector<blob> &buffer_list, unsigned int offset)\n{\n    for (blob &bb : buffer_list) {\n        if (offset < bb.length()) {\n            (const_cast<char *>(bb.data()))[offset]++;\n            break;\n        } else\n            offset -= bb.length();\n    }\n}\n\nstatic void corrupt_data(message_ex *request, const std::string &corrupt_type)\n{\n    if (corrupt_type == \"header\")\n        replace_value(request->buffers, rand::next_u32(0, sizeof(message_header) - 1));\n    else if (corrupt_type == \"body\")\n        replace_value(request->buffers,\n                      rand::next_u32(0, request->body_size() - 1) + sizeof(message_header));\n    else if (corrupt_type == \"random\")\n        replace_value(request->buffers,\n                      rand::next_u32(0, request->body_size() + sizeof(message_header) - 1));\n    else {\n        derror(\"try to inject an unknown data corrupt type: %s\", corrupt_type.c_str());\n    }\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic bool fault_on_rpc_call(task *caller, message_ex *req, rpc_response_task *callee)\n{\n    fj_opt &opt = s_fj_opts[req->local_rpc_code];\n    if (rand::next_double01() < opt.rpc_request_drop_ratio) {\n        ddebug(\"fault inject %s at %s: %s => %s\",\n               req->header->rpc_name,\n               __FUNCTION__,\n               req->header->from_address.to_string(),\n               req->to_address.to_string());\n        return false;\n    } else {\n        if (rand::next_double01() < opt.rpc_request_data_corrupted_ratio) {\n            ddebug(\"corrupt the rpc call message from: %s, type: %s\",\n                   req->header->from_address.to_string(),\n                   opt.rpc_message_data_corrupted_type.c_str());\n            corrupt_data(req, opt.rpc_message_data_corrupted_type);\n        }\n        return true;\n    }\n}\n\nstatic void fault_on_rpc_request_enqueue(rpc_request_task *callee)\n{\n    fj_opt &opt = s_fj_opts[callee->spec().code];\n    if (callee->delay_milliseconds() == 0 && task_ext_for_fj::get(callee) == 0) {\n        if (rand::next_double01() < opt.rpc_request_delay_ratio) {\n            callee->set_delay(\n                rand::next_u32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max));\n            ddebug(\"fault inject %s at %s with delay %u ms\",\n                   callee->spec().name.c_str(),\n                   __FUNCTION__,\n                   callee->delay_milliseconds());\n            task_ext_for_fj::get(callee) = 1; // ensure only fd once\n        }\n    }\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic bool fault_on_rpc_reply(task *caller, message_ex *msg)\n{\n    fj_opt &opt = s_fj_opts[msg->local_rpc_code];\n    if (rand::next_double01() < opt.rpc_response_drop_ratio) {\n        ddebug(\"fault inject %s at %s: %s => %s\",\n               msg->header->rpc_name,\n               __FUNCTION__,\n               msg->header->from_address.to_string(),\n               msg->to_address.to_string());\n        return false;\n    } else {\n        if (rand::next_double01() < opt.rpc_response_data_corrupted_ratio) {\n            ddebug(\"fault injector corrupt the rpc reply message from: %s, type: %s\",\n                   msg->header->from_address.to_string(),\n                   opt.rpc_message_data_corrupted_type.c_str());\n            corrupt_data(msg, opt.rpc_message_data_corrupted_type);\n        }\n        return true;\n    }\n}\n\nstatic void fault_on_rpc_response_enqueue(rpc_response_task *resp)\n{\n    fj_opt &opt = s_fj_opts[resp->spec().code];\n    if (resp->delay_milliseconds() == 0 && task_ext_for_fj::get(resp) == 0) {\n        if (rand::next_double01() < opt.rpc_response_delay_ratio) {\n            resp->set_delay(\n                rand::next_u32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max));\n            ddebug(\"fault inject %s at %s with delay %u ms\",\n                   resp->spec().name.c_str(),\n                   __FUNCTION__,\n                   resp->delay_milliseconds());\n            task_ext_for_fj::get(resp) = 1; // ensure only fd once\n        }\n    }\n}\n\nvoid fault_injector::install(service_spec &spec)\n{\n    task_ext_for_fj::register_ext();\n\n    s_fj_opts = new fj_opt[dsn::task_code::max() + 1];\n    fj_opt default_opt;\n    read_config(\"task..default\", default_opt);\n\n    for (int i = 0; i <= dsn::task_code::max(); i++) {\n        if (i == TASK_CODE_INVALID)\n            continue;\n\n        std::string section_name =\n            std::string(\"task.\") + std::string(dsn::task_code(i).to_string());\n        task_spec *spec = task_spec::get(i);\n        dassert(spec != nullptr, \"task_spec cannot be null\");\n\n        fj_opt &lopt = s_fj_opts[i];\n        read_config(section_name.c_str(), lopt, &default_opt);\n\n        if (!lopt.fault_injection_enabled)\n            continue;\n\n        spec->on_task_enqueue.put_back(fault_on_task_enqueue, \"fault_injector\");\n        spec->on_task_begin.put_back(fault_on_task_begin, \"fault_injector\");\n        spec->on_task_end.put_back(fault_on_task_end, \"fault_injector\");\n        spec->on_task_cancelled.put_back(fault_on_task_cancelled, \"fault_injector\");\n        spec->on_task_wait_pre.put_back(fault_on_task_wait_pre, \"fault_injector\");\n        spec->on_task_wait_post.put_back(fault_on_task_wait_post, \"fault_injector\");\n        spec->on_task_cancel_post.put_back(fault_on_task_cancel_post, \"fault_injector\");\n        spec->on_aio_call.put_native(fault_on_aio_call);\n        spec->on_aio_enqueue.put_back(fault_on_aio_enqueue, \"fault_injector\");\n        spec->on_rpc_call.put_native(fault_on_rpc_call);\n        spec->on_rpc_request_enqueue.put_back(fault_on_rpc_request_enqueue, \"fault_injector\");\n        spec->on_rpc_reply.put_native(fault_on_rpc_reply);\n        spec->on_rpc_response_enqueue.put_back(fault_on_rpc_response_enqueue, \"fault_injector\");\n    }\n\n    if (default_opt.node_crash_minutes_max > 0) {\n        // TODO:\n    }\n}\n\nfault_injector::fault_injector(const char *name) : toollet(name) {}\n}\n}\n"
  },
  {
    "path": "src/runtime/global_config.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     configuration of rDSN\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <thread>\n\n#include <dsn/utility/singleton_store.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/flags.h>\n\n#include <dsn/tool-api/global_config.h>\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/network.h>\n#include <dsn/cpp/service_app.h>\n\nnamespace dsn {\n\nstatic bool build_client_network_confs(const char *section,\n                                       /*out*/ network_client_configs &nss,\n                                       network_client_configs *default_spec)\n{\n    nss.clear();\n\n    std::vector<const char *> keys;\n    dsn_config_get_all_keys(section, keys);\n\n    for (const char *item : keys) {\n        std::string k(item);\n        if (k.length() <= strlen(\"network.client.\"))\n            continue;\n\n        if (k.substr(0, strlen(\"network.client.\")) != std::string(\"network.client.\"))\n            continue;\n\n        auto k2 = k.substr(strlen(\"network.client.\"));\n        if (rpc_channel::is_exist(k2.c_str())) {\n            /*\n            ;channel = network_provider_name,buffer_block_size\n            network.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\n            network.client.RPC_CHANNEL_UDP = dsn::tools::asio_network_provider,65536\n            */\n\n            rpc_channel ch = rpc_channel::from_string(k2.c_str(), RPC_CHANNEL_TCP);\n\n            // dsn::tools::asio_network_provider,65536\n            std::list<std::string> vs;\n            std::string v = dsn_config_get_value_string(\n                section,\n                k.c_str(),\n                \"\",\n                \"network channel configuration, e.g., dsn::tools::asio_network_provider,65536\");\n            utils::split_args(v.c_str(), vs, ',');\n\n            if (vs.size() != 2) {\n                printf(\"invalid client network specification '%s', should be \"\n                       \"'$network-factory,$msg-buffer-size'\\n\",\n                       v.c_str());\n                return false;\n            }\n\n            network_client_config ns;\n            ns.factory_name = vs.begin()->c_str();\n            ns.message_buffer_block_size = atoi(vs.rbegin()->c_str());\n\n            if (ns.message_buffer_block_size == 0) {\n                printf(\"invalid message buffer size specified: '%s'\\n\", vs.rbegin()->c_str());\n                return false;\n            }\n\n            nss[ch] = ns;\n        } else {\n            printf(\"invalid rpc channel type: %s\\n\", k2.c_str());\n            return false;\n        }\n    }\n\n    if (default_spec) {\n        for (auto &kv : *default_spec) {\n            if (nss.find(kv.first) == nss.end()) {\n                nss[kv.first] = kv.second;\n            }\n        }\n    }\n\n    return true;\n}\n\nstatic bool build_server_network_confs(const char *section,\n                                       /*out*/ network_server_configs &nss,\n                                       network_server_configs *default_spec,\n                                       const std::vector<int> &ports,\n                                       bool is_template)\n{\n    nss.clear();\n\n    std::vector<const char *> keys;\n    dsn_config_get_all_keys(section, keys);\n\n    for (const char *item : keys) {\n        std::string k(item);\n        if (k.length() <= strlen(\"network.server.\"))\n            continue;\n\n        if (k.substr(0, strlen(\"network.server.\")) != std::string(\"network.server.\"))\n            continue;\n\n        auto k2 = k.substr(strlen(\"network.server.\"));\n        std::list<std::string> ks;\n        utils::split_args(k2.c_str(), ks, '.');\n        if (ks.size() != 2) {\n            printf(\"invalid network server config '%s', should be like \"\n                   \"'network.server.12345.RPC_CHANNEL_TCP' instead\\n\",\n                   k.c_str());\n            return false;\n        }\n\n        int port = atoi(ks.begin()->c_str());\n        auto k3 = *ks.rbegin();\n\n        if (is_template) {\n            if (port != 0) {\n                printf(\"invalid network server configuration '%s'\\n\", k.c_str());\n                printf(\"port must be zero in [apps..default]\\n\");\n                printf(\" e.g., network.server.0.RPC_CHANNEL_TCP = NET_HDR_DSN, \"\n                       \"dsn::tools::asio_network_provider,65536\\n\");\n                return false;\n            }\n        } else {\n            if (std::find(ports.begin(), ports.end(), port) == ports.end()) {\n                continue;\n            }\n        }\n\n        if (rpc_channel::is_exist(k3.c_str())) {\n            /*\n            port = 0 for default setting in [apps..default]\n            port.channel = network_provider_name,buffer_block_size\n            network.server.port().RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\n            network.server.port().RPC_CHANNEL_UDP = dsn::tools::asio_network_provider,65536\n            */\n\n            rpc_channel ch = rpc_channel::from_string(k3.c_str(), RPC_CHANNEL_TCP);\n\n            // dsn::tools::asio_network_provider,65536\n            std::list<std::string> vs;\n            std::string v = dsn_config_get_value_string(\n                section,\n                k.c_str(),\n                \"\",\n                \"network channel configuration, e.g., dsn::tools::asio_network_provider,65536\");\n            utils::split_args(v.c_str(), vs, ',');\n\n            if (vs.size() != 2) {\n                printf(\"invalid server network specification '%s', should be \"\n                       \"'$network-factory,$msg-buffer-size'\\n\",\n                       v.c_str());\n                return false;\n            }\n\n            network_server_config ns(port, ch);\n            ns.factory_name = vs.begin()->c_str();\n            ns.message_buffer_block_size = atoi(vs.rbegin()->c_str());\n\n            if (ns.message_buffer_block_size == 0) {\n                printf(\"invalid message buffer size specified: '%s'\\n\", vs.rbegin()->c_str());\n                return false;\n            }\n\n            nss[ns] = ns;\n        } else {\n            printf(\"invalid rpc channel type: %s\\n\", k3.c_str());\n            return false;\n        }\n    }\n\n    if (default_spec) {\n        for (auto &kv : *default_spec) {\n            network_server_config cs = kv.second;\n            for (auto &port : ports) {\n                cs.port = port;\n                if (nss.find(cs) == nss.end()) {\n                    nss[cs] = cs;\n                }\n            }\n\n            if (is_template) {\n                cs.port = 0;\n                if (nss.find(cs) == nss.end()) {\n                    nss[cs] = cs;\n                }\n            }\n        }\n    }\n\n    return true;\n}\n\nbool service_app_spec::init(const char *section,\n                            const char *role_name_,\n                            service_app_spec *default_value,\n                            network_client_configs *default_client_nets,\n                            network_server_configs *default_server_nets)\n{\n    id = 0;\n    index = 0;\n    role_name = std::string(role_name_);\n    full_name = role_name;\n    config_section = std::string(section);\n\n    if (!read_config(section, *this, default_value))\n        return false;\n\n    std::sort(ports.begin(), ports.end());\n    ports_gap = ports.size() > 0 ? (*ports.rbegin() + 1 - *ports.begin()) : 0;\n\n    if (!build_client_network_confs(section,\n                                    this->network_client_confs,\n                                    default_value ? &default_value->network_client_confs\n                                                  : default_client_nets))\n        return false;\n\n    if (!build_server_network_confs(section,\n                                    this->network_server_confs,\n                                    default_value ? &default_value->network_server_confs\n                                                  : default_server_nets,\n                                    ports,\n                                    default_value == nullptr))\n        return false;\n\n    return true;\n}\n\nnetwork_client_config::network_client_config()\n{\n    factory_name = \"dsn::tools::asio_network_provider\";\n    message_buffer_block_size = 65536;\n}\n\nnetwork_server_config::network_server_config() : port(0), channel(RPC_CHANNEL_TCP)\n{\n    factory_name = \"dsn::tools::asio_network_provider\";\n    message_buffer_block_size = 65536;\n}\n\nnetwork_server_config::network_server_config(int p, rpc_channel c) : port(p), channel(c)\n{\n    factory_name = \"dsn::tools::asio_network_provider\";\n    message_buffer_block_size = 65536;\n}\n\nnetwork_server_config::network_server_config(const network_server_config &r)\n    : port(r.port), channel(r.channel)\n{\n    factory_name = r.factory_name;\n    message_buffer_block_size = r.message_buffer_block_size;\n}\n\nbool network_server_config::operator<(const network_server_config &r) const\n{\n    return port < r.port || (port == r.port && channel < r.channel);\n}\n\nbool service_spec::init()\n{\n    // init common spec\n    if (!read_config(\"core\", *this, nullptr))\n        return false;\n\n    // init thread pools\n    if (!threadpool_spec::init(threadpool_specs))\n        return false;\n\n    // init task specs\n    if (!task_spec::init())\n        return false;\n\n    return true;\n}\n\nbool service_spec::init_app_specs()\n{\n    std::vector<std::string> all_section_names;\n    dsn_config_get_all_sections(all_section_names);\n\n    // check mimic app\n    const char *mimic_app_role_name = \"dsn.app.mimic\";\n    service_app::register_factory<service_app>(mimic_app_role_name);\n    if (enable_default_app_mimic) {\n        std::string mimic_section_name(\"apps.mimic\");\n        if (std::find(all_section_names.begin(), all_section_names.end(), mimic_section_name) ==\n            all_section_names.end()) {\n            dsn_config_set(\"apps.mimic\", \"type\", mimic_app_role_name, \"\");\n            dsn_config_set(\"apps.mimic\", \"pools\", \"THREAD_POOL_DEFAULT\", \"\");\n            all_section_names.push_back(\"apps.mimic\");\n        } else {\n            auto type = dsn_config_get_value_string(\"apps.mimic\", \"type\", \"\", \"\");\n            if (strcmp(type, mimic_app_role_name) != 0) {\n                printf(\"invalid config value '%s' for [apps.mimic] type\", type);\n                return false;\n            }\n        }\n    }\n\n    // init all apps\n    service_app_spec default_app;\n    if (!default_app.init(\"apps..default\",\n                          \".default\",\n                          nullptr,\n                          &this->network_default_client_cfs,\n                          &this->network_default_server_cfs))\n        return false;\n\n    int app_id = 0;\n    for (auto it = all_section_names.begin(); it != all_section_names.end(); ++it) {\n        if (it->substr(0, strlen(\"apps.\")) == std::string(\"apps.\") &&\n            *it != std::string(\"apps..default\")) {\n            service_app_spec app;\n            if (!app.init((*it).c_str(), it->substr(5).c_str(), &default_app))\n                return false;\n\n            // fix ports_gap when necessary\n            int ports_gap = app.ports_gap;\n\n            auto ports = app.ports;\n            auto nsc = app.network_server_confs;\n            for (int i = 1; i <= app.count; i++) {\n                char buf[16];\n                sprintf(buf, \"%u\", i);\n                app.full_name = (app.count > 1 ? (app.role_name + buf) : app.role_name);\n                app.id = ++app_id;\n                app.index = i;\n                app.data_dir = utils::filesystem::path_combine(data_dir, app.full_name);\n\n                // add app\n                app_specs.push_back(app);\n                dassert((int)app_specs.size() == app.id, \"incorrect app id\");\n\n                // for next instance\n                app.ports.clear();\n                for (auto &p : ports) {\n                    app.ports.push_back(p + i * ports_gap);\n                }\n\n                app.network_server_confs.clear();\n                for (auto sc : nsc) {\n                    sc.second.port += i * ports_gap;\n                    app.network_server_confs[sc.second] = sc.second;\n                }\n            }\n        }\n    }\n\n    return true;\n}\n} // end namespace dsn\n"
  },
  {
    "path": "src/runtime/message_utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/cpp/message_utils.h>\n\nnamespace dsn {\n\n/*extern*/ dsn::message_ex *from_blob_to_received_msg(dsn::task_code rpc_code,\n                                                      const blob &bb,\n                                                      int thread_hash,\n                                                      uint64_t partition_hash,\n                                                      dsn_msg_serialize_format serialization_type)\n{\n    auto msg = ::dsn::message_ex::create_receive_message_with_standalone_header(bb);\n    msg->local_rpc_code = rpc_code;\n    const char *name = rpc_code.to_string();\n    strncpy(msg->header->rpc_name, name, sizeof(msg->header->rpc_name) - 1);\n    msg->header->rpc_name[sizeof(msg->header->rpc_name) - 1] = '\\0';\n\n    msg->header->client.thread_hash = thread_hash;\n    msg->header->client.partition_hash = partition_hash;\n    msg->header->context.u.serialize_format = serialization_type;\n    return msg;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/nativerun.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool/nativerun.h>\n\nnamespace dsn {\nnamespace tools {\n\nvoid nativerun::install(service_spec &spec)\n{\n    if (spec.env_factory_name == \"\")\n        spec.env_factory_name = (\"dsn::env_provider\");\n\n    if (spec.timer_factory_name == \"\")\n        spec.timer_factory_name = (\"dsn::tools::simple_timer_service\");\n    {\n        network_client_config cs;\n        cs.factory_name = \"dsn::tools::asio_network_provider\";\n        cs.message_buffer_block_size = 1024 * 64;\n        spec.network_default_client_cfs[RPC_CHANNEL_TCP] = cs;\n    }\n    {\n        network_server_config cs2;\n        cs2.port = 0;\n        cs2.channel = RPC_CHANNEL_TCP;\n        cs2.factory_name = \"dsn::tools::asio_network_provider\";\n        cs2.message_buffer_block_size = 1024 * 64;\n        spec.network_default_server_cfs[cs2] = cs2;\n    }\n    {\n        network_client_config cs;\n        cs.factory_name = \"dsn::tools::asio_udp_provider\";\n        cs.message_buffer_block_size = 1024 * 64;\n        spec.network_default_client_cfs[RPC_CHANNEL_UDP] = cs;\n    }\n    {\n        network_server_config cs2;\n        cs2.port = 0;\n        cs2.channel = RPC_CHANNEL_UDP;\n        cs2.factory_name = \"dsn::tools::asio_udp_provider\";\n        cs2.message_buffer_block_size = 1024 * 64;\n        spec.network_default_server_cfs[cs2] = cs2;\n    }\n\n    if (spec.logging_factory_name == \"\")\n        spec.logging_factory_name = \"dsn::tools::simple_logger\";\n\n    if (spec.lock_factory_name == \"\")\n        spec.lock_factory_name = (\"dsn::tools::std_lock_provider\");\n\n    if (spec.lock_nr_factory_name == \"\")\n        spec.lock_nr_factory_name = (\"dsn::tools::std_lock_nr_provider\");\n\n    if (spec.rwlock_nr_factory_name == \"\")\n        spec.rwlock_nr_factory_name = (\"dsn::tools::std_rwlock_nr_provider\");\n\n    if (spec.semaphore_factory_name == \"\")\n        spec.semaphore_factory_name = (\"dsn::tools::std_semaphore_provider\");\n\n    for (auto it = spec.threadpool_specs.begin(); it != spec.threadpool_specs.end(); ++it) {\n        threadpool_spec &tspec = *it;\n\n        if (tspec.worker_factory_name == \"\")\n            tspec.worker_factory_name = (\"dsn::task_worker\");\n\n        if (tspec.queue_factory_name == \"\")\n            tspec.queue_factory_name = (\"dsn::tools::simple_task_queue\");\n    }\n}\n\nvoid nativerun::run() { tool_app::run(); }\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/profiler.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\nHELP GRAPH\n                           CALL ===== net(call) ========> ENQUEUE ===== queue(server) ====> START\n                            ^                               ^                                ||\n                            |                               |                                ||\n                            |                               |                                ||\n                            |                               |                                ||\n                            |                               |                                ||\n                      Client Latency                Server Latency                     exec(server)\n                            |                               |                                ||\n                            |                               |                                ||\n                            |                               |                                ||\n                            |                               |                                ||\n                            V                               V                                ||\nSTART<== queue(server) == ENQUEUE <===== net(reply) ======= REPLY <=============================\n  ||\n  ||\n exec(client)\n  ||\n  ||\n  \\/\n END\n*/\n#include <dsn/toollet/profiler.h>\n#include <dsn/service_api_c.h>\n#include <dsn/tool-api/aio_task.h>\n#include \"utils/shared_io_service.h\"\n#include \"profiler_header.h\"\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n\nnamespace dsn {\nnamespace tools {\n\ntypedef uint64_extension_helper<task_spec_profiler, task> task_ext_for_profiler;\ntypedef uint64_extension_helper<task_spec_profiler, message_ex> message_ext_for_profiler;\n\nstd::unique_ptr<task_spec_profiler[]> s_spec_profilers;\n\nint s_task_code_max = 0;\n\ncounter_info *counter_info_ptr[] = {\n    new counter_info({\"queue.time\", \"qt\"},\n                     TASK_QUEUEING_TIME_NS,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"QUEUE(ns)\",\n                     \"ns\"),\n    new counter_info(\n        {\"exec.time\", \"et\"}, TASK_EXEC_TIME_NS, COUNTER_TYPE_NUMBER_PERCENTILES, \"EXEC(ns)\", \"ns\"),\n    new counter_info({\"throughput\", \"tp\"}, TASK_THROUGHPUT, COUNTER_TYPE_RATE, \"THP(#/s)\", \"#/s\"),\n    new counter_info({\"cancelled\", \"cc\"}, TASK_CANCELLED, COUNTER_TYPE_NUMBER, \"CANCEL(#)\", \"#\"),\n    new counter_info({\"aio.latency\", \"al\"},\n                     AIO_LATENCY_NS,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"AIO.LATENCY(ns)\",\n                     \"ns\"),\n    new counter_info({\"rpc.server.latency\", \"rpcsl\"},\n                     RPC_SERVER_LATENCY_NS,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"RPC.SERVER(ns)\",\n                     \"ns\"),\n    new counter_info({\"rpc.server.size.request\", \"rpcssreq\"},\n                     RPC_SERVER_SIZE_PER_REQUEST_IN_BYTES,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"RPC.SERVER.SIZE.REQUEST(bytes)\",\n                     \"bytes\"),\n    new counter_info({\"rpc.server.size.response\", \"rpcssresp\"},\n                     RPC_SERVER_SIZE_PER_RESPONSE_IN_BYTES,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"RPC.SERVER.SIZE.RESPONSE(bytes)\",\n                     \"bytes\"),\n    new counter_info({\"rpc.client.latency\", \"rpccl\"},\n                     RPC_CLIENT_NON_TIMEOUT_LATENCY_NS,\n                     COUNTER_TYPE_NUMBER_PERCENTILES,\n                     \"RPC.CLIENT(ns)\",\n                     \"ns\"),\n    new counter_info({\"rpc.client.timeout\", \"rpcto\"},\n                     RPC_CLIENT_TIMEOUT_THROUGHPUT,\n                     COUNTER_TYPE_RATE,\n                     \"TIMEOUT(#/s)\",\n                     \"#/s\"),\n    new counter_info(\n        {\"task.inqueue\", \"tiq\"}, TASK_IN_QUEUE, COUNTER_TYPE_NUMBER, \"InQueue(#)\", \"#\"),\n    new counter_info({\"rpc.dropped\", \"rdit\"},\n                     RPC_DROPPED_IF_TIMEOUT,\n                     COUNTER_TYPE_VOLATILE_NUMBER,\n                     \"RPC.DROPPED(#)\",\n                     \"#\")};\n\n// call normal task\nstatic void profiler_on_task_create(task *caller, task *callee)\n{\n    task_ext_for_profiler::get(callee) = dsn_now_ns();\n}\n\nstatic void profiler_on_task_enqueue(task *caller, task *callee)\n{\n    auto callee_code = callee->spec().code;\n    dassert(callee_code >= 0 && callee_code <= s_task_code_max, \"code = %d\", callee_code.code());\n\n    if (caller != nullptr) {\n        auto caller_code = caller->spec().code;\n        dassert(\n            caller_code >= 0 && caller_code <= s_task_code_max, \"code = %d\", caller_code.code());\n\n        auto &prof = s_spec_profilers[caller_code];\n        if (prof.collect_call_count) {\n            prof.call_counts[callee_code]++;\n        }\n    }\n\n    task_ext_for_profiler::get(callee) = dsn_now_ns();\n    if (callee->delay_milliseconds() == 0) {\n        auto ptr = s_spec_profilers[callee_code].ptr[TASK_IN_QUEUE].get();\n        if (ptr != nullptr)\n            ptr->increment();\n    }\n}\n\nstatic void profiler_on_task_begin(task *this_)\n{\n    auto code = this_->spec().code;\n    dassert(code >= 0 && code <= s_task_code_max, \"code = %d\", code.code());\n\n    uint64_t &qts = task_ext_for_profiler::get(this_);\n    uint64_t now = dsn_now_ns();\n    auto ptr = s_spec_profilers[code].ptr[TASK_QUEUEING_TIME_NS].get();\n    if (ptr != nullptr)\n        ptr->set(now - qts);\n    qts = now;\n\n    ptr = s_spec_profilers[code].ptr[TASK_IN_QUEUE].get();\n    if (ptr != nullptr)\n        ptr->decrement();\n}\n\nstatic void profiler_on_task_end(task *this_)\n{\n    auto code = this_->spec().code;\n    dassert(code >= 0 && code <= s_task_code_max, \"code = %d\", code.code());\n\n    uint64_t qts = task_ext_for_profiler::get(this_);\n    uint64_t now = dsn_now_ns();\n    auto ptr = s_spec_profilers[code].ptr[TASK_EXEC_TIME_NS].get();\n    if (ptr != nullptr)\n        ptr->set(now - qts);\n\n    ptr = s_spec_profilers[code].ptr[TASK_THROUGHPUT].get();\n    if (ptr != nullptr)\n        ptr->increment();\n}\n\nstatic void profiler_on_task_cancelled(task *this_)\n{\n    auto code = this_->spec().code;\n    dassert(code >= 0 && code <= s_task_code_max, \"code = %d\", code.code());\n\n    auto ptr = s_spec_profilers[code].ptr[TASK_CANCELLED].get();\n    if (ptr != nullptr)\n        ptr->increment();\n}\n\nstatic void profiler_on_task_wait_pre(task *caller, task *callee, uint32_t timeout_ms) {}\n\nstatic void profiler_on_task_wait_post(task *caller, task *callee, bool succ) {}\n\nstatic void profiler_on_task_cancel_post(task *caller, task *callee, bool succ) {}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void profiler_on_aio_call(task *caller, aio_task *callee)\n{\n    if (nullptr != caller) {\n        auto caller_code = caller->spec().code;\n        dassert(\n            caller_code >= 0 && caller_code <= s_task_code_max, \"code = %d\", caller_code.code());\n\n        auto &prof = s_spec_profilers[caller_code];\n        if (prof.collect_call_count) {\n            auto callee_code = callee->spec().code;\n            dassert(callee_code >= 0 && callee_code <= s_task_code_max,\n                    \"code = %d\",\n                    callee_code.code());\n            prof.call_counts[callee_code]++;\n        }\n    }\n\n    // time disk io starts\n    task_ext_for_profiler::get(callee) = dsn_now_ns();\n}\n\nstatic void profiler_on_aio_enqueue(aio_task *this_)\n{\n    auto code = this_->spec().code;\n    dassert(code >= 0 && code <= s_task_code_max, \"code = %d\", code.code());\n\n    uint64_t &ats = task_ext_for_profiler::get(this_);\n    uint64_t now = dsn_now_ns();\n\n    auto ptr = s_spec_profilers[code].ptr[AIO_LATENCY_NS].get();\n    if (ptr != nullptr)\n        ptr->set(now - ats);\n    ats = now;\n\n    ptr = s_spec_profilers[code].ptr[TASK_IN_QUEUE].get();\n    if (ptr != nullptr)\n        ptr->increment();\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void profiler_on_rpc_call(task *caller, message_ex *req, rpc_response_task *callee)\n{\n    if (nullptr != caller) {\n        auto caller_code = caller->spec().code;\n        dassert(\n            caller_code >= 0 && caller_code <= s_task_code_max, \"code = %d\", caller_code.code());\n\n        auto &prof = s_spec_profilers[caller_code];\n        if (prof.collect_call_count) {\n            dassert(req->local_rpc_code >= 0 && req->local_rpc_code <= s_task_code_max,\n                    \"code = %d\",\n                    req->local_rpc_code.code());\n            prof.call_counts[req->local_rpc_code]++;\n        }\n    }\n\n    // time rpc starts\n    if (nullptr != callee) {\n        task_ext_for_profiler::get(callee) = dsn_now_ns();\n    }\n}\n\nstatic void profiler_on_rpc_request_enqueue(rpc_request_task *callee)\n{\n    auto callee_code = callee->spec().code;\n    dassert(callee_code >= 0 && callee_code <= s_task_code_max, \"code = %d\", callee_code.code());\n\n    uint64_t now = dsn_now_ns();\n    task_ext_for_profiler::get(callee) = now;\n    message_ext_for_profiler::get(callee->get_request()) = now;\n\n    auto ptr = s_spec_profilers[callee_code].ptr[TASK_IN_QUEUE].get();\n    if (ptr != nullptr) {\n        ptr->increment();\n    }\n    ptr = s_spec_profilers[callee_code].ptr[RPC_SERVER_SIZE_PER_REQUEST_IN_BYTES].get();\n    if (ptr != nullptr) {\n        ptr->set(callee->get_request()->header->body_length);\n    }\n}\n\nstatic void profile_on_rpc_task_dropped(rpc_request_task *callee)\n{\n    auto code = callee->spec().code;\n    auto ptr = s_spec_profilers[code].ptr[RPC_DROPPED_IF_TIMEOUT].get();\n    if (ptr != nullptr) {\n        ptr->increment();\n    }\n}\n\nstatic void profiler_on_rpc_create_response(message_ex *req, message_ex *resp)\n{\n    message_ext_for_profiler::get(resp) = message_ext_for_profiler::get(req);\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void profiler_on_rpc_reply(task *caller, message_ex *msg)\n{\n    auto caller_code = caller->spec().code;\n    dassert(caller_code >= 0 && caller_code <= s_task_code_max, \"code = %d\", caller_code.code());\n\n    auto &prof = s_spec_profilers[caller_code];\n    if (prof.collect_call_count) {\n        dassert(msg->local_rpc_code >= 0 && msg->local_rpc_code <= s_task_code_max,\n                \"code = %d\",\n                msg->local_rpc_code.code());\n        prof.call_counts[msg->local_rpc_code]++;\n    }\n\n    uint64_t qts = message_ext_for_profiler::get(msg);\n    uint64_t now = dsn_now_ns();\n    task_spec *spec = task_spec::get(msg->local_rpc_code);\n    dassert(spec != nullptr, \"task_spec cannot be null, code = %d\", msg->local_rpc_code.code());\n    auto code = spec->rpc_paired_code;\n    dassert(code >= 0 && code <= s_task_code_max, \"code = %d\", code.code());\n    auto ptr = s_spec_profilers[code].ptr[RPC_SERVER_LATENCY_NS].get();\n    if (ptr != nullptr) {\n        ptr->set(now - qts);\n    }\n    ptr = s_spec_profilers[code].ptr[RPC_SERVER_SIZE_PER_RESPONSE_IN_BYTES].get();\n    if (ptr != nullptr) {\n        ptr->set(msg->header->body_length);\n    }\n}\n\nstatic void profiler_on_rpc_response_enqueue(rpc_response_task *resp)\n{\n    auto resp_code = resp->spec().code;\n    dassert(resp_code >= 0 && resp_code <= s_task_code_max, \"code = %d\", resp_code.code());\n\n    uint64_t &cts = task_ext_for_profiler::get(resp);\n    uint64_t now = dsn_now_ns();\n\n    if (resp->get_response() != nullptr) {\n        auto ptr = s_spec_profilers[resp_code].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS].get();\n        if (ptr != nullptr)\n            ptr->set(now - cts);\n    } else {\n        auto ptr = s_spec_profilers[resp_code].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT].get();\n        if (ptr != nullptr)\n            ptr->increment();\n    }\n    cts = now;\n\n    auto ptr = s_spec_profilers[resp_code].ptr[TASK_IN_QUEUE].get();\n    if (ptr != nullptr)\n        ptr->increment();\n}\n\nvoid profiler::install(service_spec &)\n{\n    s_task_code_max = dsn::task_code::max();\n    s_spec_profilers.reset(new task_spec_profiler[s_task_code_max + 1]);\n    task_ext_for_profiler::register_ext();\n    message_ext_for_profiler::register_ext();\n    dassert(sizeof(counter_info_ptr) / sizeof(counter_info *) == PERF_COUNTER_COUNT,\n            \"PREF COUNTER ERROR\");\n\n    auto profile = dsn_config_get_value_bool(\n        \"task..default\", \"is_profile\", false, \"whether to profile this kind of task\");\n    auto collect_call_count = dsn_config_get_value_bool(\n        \"task..default\",\n        \"collect_call_count\",\n        true,\n        \"whether to collect how many time this kind of tasks invoke each of other kinds tasks\");\n\n    for (int i = 0; i <= s_task_code_max; i++) {\n        if (i == TASK_CODE_INVALID)\n            continue;\n\n        std::string name(dsn::task_code(i).to_string());\n        std::string section_name = std::string(\"task.\") + name;\n        task_spec *spec = task_spec::get(i);\n        dassert(spec != nullptr, \"task_spec cannot be null\");\n\n        s_spec_profilers[i].collect_call_count = dsn_config_get_value_bool(\n            section_name.c_str(),\n            \"collect_call_count\",\n            collect_call_count,\n            \"whether to collect how many time this kind of tasks invoke each of other kinds tasks\");\n        s_spec_profilers[i].call_counts = new std::atomic<int64_t>[ s_task_code_max + 1 ];\n        std::fill(s_spec_profilers[i].call_counts,\n                  s_spec_profilers[i].call_counts + s_task_code_max + 1,\n                  0);\n\n        s_spec_profilers[i].is_profile = dsn_config_get_value_bool(\n            section_name.c_str(), \"is_profile\", profile, \"whether to profile this kind of task\");\n\n        if (!s_spec_profilers[i].is_profile)\n            continue;\n\n        if (dsn_config_get_value_bool(\n                section_name.c_str(),\n                \"profiler::inqueue\",\n                true,\n                \"whether to profile the number of this kind of tasks in all queues\"))\n            s_spec_profilers[i].ptr[TASK_IN_QUEUE].init_global_counter(\n                \"zion\",\n                \"profiler\",\n                (name + std::string(\".inqueue\")).c_str(),\n                COUNTER_TYPE_NUMBER,\n                \"task number in all queues\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"profiler::queue\",\n                                      true,\n                                      \"whether to profile the queuing time of a task\"))\n            s_spec_profilers[i].ptr[TASK_QUEUEING_TIME_NS].init_global_counter(\n                \"zion\",\n                \"profiler\",\n                (name + std::string(\".queue(ns)\")).c_str(),\n                COUNTER_TYPE_NUMBER_PERCENTILES,\n                \"latency due to waiting in the queue\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"profiler::exec\",\n                                      true,\n                                      \"whether to profile the executing time of a task\"))\n            s_spec_profilers[i].ptr[TASK_EXEC_TIME_NS].init_global_counter(\n                \"zion\",\n                \"profiler\",\n                (name + std::string(\".exec(ns)\")).c_str(),\n                COUNTER_TYPE_NUMBER_PERCENTILES,\n                \"latency due to executing tasks\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"profiler::qps\",\n                                      true,\n                                      \"whether to profile the qps of a task\"))\n            s_spec_profilers[i].ptr[TASK_THROUGHPUT].init_global_counter(\n                \"zion\",\n                \"profiler\",\n                (name + std::string(\".qps\")).c_str(),\n                COUNTER_TYPE_RATE,\n                \"task numbers per second\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"profiler::cancelled\",\n                                      true,\n                                      \"whether to profile the cancelled times of a task\"))\n            s_spec_profilers[i].ptr[TASK_CANCELLED].init_global_counter(\n                \"zion\",\n                \"profiler\",\n                (name + std::string(\".cancelled\")).c_str(),\n                COUNTER_TYPE_NUMBER,\n                \"cancelled times of a specific task type\");\n\n        if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_REQUEST) {\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::latency.server\",\n                                          true,\n                                          \"whether to profile the server latency of a task\")) {\n                s_spec_profilers[i].ptr[RPC_SERVER_LATENCY_NS].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".latency.server\")).c_str(),\n                    COUNTER_TYPE_NUMBER_PERCENTILES,\n                    \"latency from enqueue point to reply point on the server side for RPC \"\n                    \"tasks\");\n            }\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::size.request.server\",\n                                          false,\n                                          \"whether to profile the size per request\")) {\n                s_spec_profilers[i].ptr[RPC_SERVER_SIZE_PER_REQUEST_IN_BYTES].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".size.request.server\")).c_str(),\n                    COUNTER_TYPE_NUMBER_PERCENTILES,\n                    \"\");\n            }\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::size.response.server\",\n                                          false,\n                                          \"whether to profile the size per response\")) {\n                s_spec_profilers[i].ptr[RPC_SERVER_SIZE_PER_RESPONSE_IN_BYTES].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".size.response.server\")).c_str(),\n                    COUNTER_TYPE_NUMBER_PERCENTILES,\n                    \"\");\n            }\n            if (dsn_config_get_value_bool(\n                    section_name.c_str(),\n                    \"rpc_request_dropped_before_execution_when_timeout\",\n                    false,\n                    \"whether to profile the number of rpc dropped for timeout\"))\n                s_spec_profilers[i].ptr[RPC_DROPPED_IF_TIMEOUT].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".rpc.dropped\")).c_str(),\n                    COUNTER_TYPE_VOLATILE_NUMBER,\n                    \"rpc dropped if queue time exceed client timeout\");\n        } else if (spec->type == dsn_task_type_t::TASK_TYPE_RPC_RESPONSE) {\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::latency.client\",\n                                          true,\n                                          \"whether to profile the client latency of a task\"))\n                s_spec_profilers[i].ptr[RPC_CLIENT_NON_TIMEOUT_LATENCY_NS].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".latency.client(ns)\")).c_str(),\n                    COUNTER_TYPE_NUMBER_PERCENTILES,\n                    \"latency from call point to enqueue point on the client side for RPC \"\n                    \"tasks\");\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::timeout.qps\",\n                                          true,\n                                          \"whether to profile the timeout qps of a task\"))\n                s_spec_profilers[i].ptr[RPC_CLIENT_TIMEOUT_THROUGHPUT].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".timeout.qps\")).c_str(),\n                    COUNTER_TYPE_RATE,\n                    \"time-out task numbers per second for RPC tasks\");\n        } else if (spec->type == dsn_task_type_t::TASK_TYPE_AIO) {\n            if (dsn_config_get_value_bool(section_name.c_str(),\n                                          \"profiler::latency\",\n                                          true,\n                                          \"whether to profile the latency of an AIO task\"))\n                s_spec_profilers[i].ptr[AIO_LATENCY_NS].init_global_counter(\n                    \"zion\",\n                    \"profiler\",\n                    (name + std::string(\".latency(ns)\")).c_str(),\n                    COUNTER_TYPE_NUMBER_PERCENTILES,\n                    \"latency from call point to enqueue point for AIO tasks\");\n        }\n\n        // we don't use perf_counter_ptr but perf_counter* in ptr[xxx] to avoid unnecessary memory\n        // access cost\n        // we need to add reference so that the counters won't go\n        // release_ref should be done when the profiler exits (which never happens right now so we\n        // omit that for the time being)\n        for (size_t j = 0; j < sizeof(s_spec_profilers[i].ptr) / sizeof(perf_counter *); j++) {\n            if (s_spec_profilers[i].ptr[j].get() != nullptr) {\n                s_spec_profilers[i].ptr[j]->add_ref();\n            }\n        }\n\n        spec->on_task_create.put_back(profiler_on_task_create, \"profiler\");\n        spec->on_task_enqueue.put_back(profiler_on_task_enqueue, \"profiler\");\n        spec->on_task_begin.put_back(profiler_on_task_begin, \"profiler\");\n        spec->on_task_end.put_back(profiler_on_task_end, \"profiler\");\n        spec->on_task_cancelled.put_back(profiler_on_task_cancelled, \"profiler\");\n        spec->on_task_wait_pre.put_back(profiler_on_task_wait_pre, \"profiler\");\n        spec->on_task_wait_post.put_back(profiler_on_task_wait_post, \"profiler\");\n        spec->on_task_cancel_post.put_back(profiler_on_task_cancel_post, \"profiler\");\n        spec->on_aio_call.put_back(profiler_on_aio_call, \"profiler\");\n        spec->on_aio_enqueue.put_back(profiler_on_aio_enqueue, \"profiler\");\n        spec->on_rpc_call.put_back(profiler_on_rpc_call, \"profiler\");\n        spec->on_rpc_request_enqueue.put_back(profiler_on_rpc_request_enqueue, \"profiler\");\n        spec->on_rpc_task_dropped.put_back(profile_on_rpc_task_dropped, \"profiler\");\n        spec->on_rpc_create_response.put_back(profiler_on_rpc_create_response, \"profiler\");\n        spec->on_rpc_reply.put_back(profiler_on_rpc_reply, \"profiler\");\n        spec->on_rpc_response_enqueue.put_back(profiler_on_rpc_response_enqueue, \"profiler\");\n    }\n}\n\nprofiler::profiler(const char *name) : toollet(name) {}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/profiler_header.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n#include <iomanip>\n#include <dsn/perf_counter/perf_counter_wrapper.h>\n\nnamespace dsn {\nnamespace tools {\n\nenum perf_counter_ptr_type\n{\n    TASK_QUEUEING_TIME_NS,\n    TASK_EXEC_TIME_NS,\n    TASK_THROUGHPUT,\n    TASK_CANCELLED,\n    AIO_LATENCY_NS,\n    RPC_SERVER_LATENCY_NS,\n    RPC_SERVER_SIZE_PER_REQUEST_IN_BYTES,\n    RPC_SERVER_SIZE_PER_RESPONSE_IN_BYTES,\n    RPC_CLIENT_NON_TIMEOUT_LATENCY_NS,\n    RPC_CLIENT_TIMEOUT_THROUGHPUT,\n    TASK_IN_QUEUE,\n    RPC_DROPPED_IF_TIMEOUT,\n\n    PERF_COUNTER_COUNT,\n    PERF_COUNTER_INVALID\n};\n\nclass counter_info\n{\npublic:\n    counter_info(const std::vector<std::string> &command_keys,\n                 perf_counter_ptr_type ptr_type,\n                 dsn_perf_counter_type_t counter_type,\n                 const std::string &title,\n                 const std::string &unit)\n        : keys(command_keys),\n          counter_ptr_type(ptr_type),\n          type(counter_type),\n          title(title),\n          unit_name(unit)\n    {\n    }\n\n    std::vector<std::string> keys;\n    perf_counter_ptr_type counter_ptr_type;\n    dsn_perf_counter_type_t type;\n    std::string title;\n    std::string unit_name;\n};\n\nstruct task_spec_profiler\n{\n    perf_counter_wrapper ptr[PERF_COUNTER_COUNT];\n    bool collect_call_count;\n    bool is_profile;\n    std::atomic<int64_t> *call_counts;\n\n    task_spec_profiler()\n    {\n        collect_call_count = false;\n        is_profile = false;\n        call_counts = nullptr;\n        memset((void *)ptr, 0, sizeof(ptr));\n    }\n};\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/providers.common.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"runtime/rpc/asio_net_provider.h\"\n#include <dsn/tool/providers.common.h>\n#include \"utils/lockp.std.h\"\n#include \"runtime/task/simple_task_queue.h\"\n#include \"runtime/task/hpc_task_queue.h\"\n#include \"runtime/rpc/network.sim.h\"\n#include \"utils/simple_logger.h\"\n#include \"runtime/rpc/dsn_message_parser.h\"\n#include \"runtime/rpc/thrift_message_parser.h\"\n#include \"runtime/rpc/raw_message_parser.h\"\n\nnamespace dsn {\nnamespace tools {\n\nvoid register_std_lock_providers()\n{\n    lock_provider::register_component<std_lock_provider>(\"dsn::tools::std_lock_provider\");\n    lock_nr_provider::register_component<std_lock_nr_provider>(\"dsn::tools::std_lock_nr_provider\");\n    rwlock_nr_provider::register_component<std_rwlock_nr_provider>(\n        \"dsn::tools::std_rwlock_nr_provider\");\n    semaphore_provider::register_component<std_semaphore_provider>(\n        \"dsn::tools::std_semaphore_provider\");\n}\n\nvoid register_common_providers()\n{\n    register_component_provider<env_provider>(\"dsn::env_provider\");\n    register_component_provider<task_worker>(\"dsn::task_worker\");\n\n    register_std_lock_providers();\n\n    register_component_provider<asio_network_provider>(\"dsn::tools::asio_network_provider\");\n    register_component_provider<asio_udp_provider>(\"dsn::tools::asio_udp_provider\");\n    register_component_provider<sim_network_provider>(\"dsn::tools::sim_network_provider\");\n    register_component_provider<simple_task_queue>(\"dsn::tools::simple_task_queue\");\n    register_component_provider<hpc_concurrent_task_queue>(\"dsn::tools::hpc_concurrent_task_queue\");\n    register_component_provider<simple_timer_service>(\"dsn::tools::simple_timer_service\");\n\n    register_message_header_parser<dsn_message_parser>(NET_HDR_DSN, {\"RDSN\"});\n    register_message_header_parser<thrift_message_parser>(NET_HDR_THRIFT, {\"THFT\"});\n    register_message_header_parser<raw_message_parser>(NET_HDR_RAW, {\"_RAW\"});\n}\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.rpc)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\nthrift_generate_cpp(\n    REQUEST_META_THRIFT_SRCS\n    REQUEST_META_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/request_meta.thrift\n)\n\nset(MY_PROJ_SRC ${REQUEST_META_THRIFT_SRCS})\n\ndsn_add_object()\n"
  },
  {
    "path": "src/runtime/rpc/asio_net_provider.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/rand.h>\n#include <memory>\n\n#include \"asio_net_provider.h\"\n#include \"asio_rpc_session.h\"\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace tools {\n\nDSN_DEFINE_uint32(\"network\",\n                  io_service_worker_count,\n                  1,\n                  \"thread number for io service (timer and boost network)\");\n\nconst int threads_per_event_loop = 1;\n\nasio_network_provider::asio_network_provider(rpc_engine *srv, network *inner_provider)\n    : connection_oriented_network(srv, inner_provider), _acceptor(nullptr)\n{\n    for (auto i = 0; i < FLAGS_io_service_worker_count; i++) {\n        // Using thread-local operation queues in single-threaded use cases (i.e. when\n        // concurrency_hint is 1) to eliminate a lock/unlock pair.\n        _io_services.emplace_back(\n            std::make_unique<boost::asio::io_service>(threads_per_event_loop));\n    }\n}\n\nasio_network_provider::~asio_network_provider()\n{\n    if (_acceptor) {\n        _acceptor->close();\n    }\n    for (auto &io_service : _io_services) {\n        io_service->stop();\n    }\n\n    for (auto &w : _workers) {\n        w->join();\n    }\n}\n\nerror_code asio_network_provider::start(rpc_channel channel, int port, bool client_only)\n{\n    if (_acceptor != nullptr)\n        return ERR_SERVICE_ALREADY_RUNNING;\n\n    // get connection threshold from config, default value 0 means no threshold\n    _cfg_conn_threshold_per_ip = (uint32_t)dsn_config_get_value_uint64(\n        \"network\", \"conn_threshold_per_ip\", 0, \"max connection count to each server per ip\");\n\n    for (int i = 0; i < FLAGS_io_service_worker_count; i++) {\n        _workers.push_back(std::make_shared<std::thread>([this, i]() {\n            task::set_tls_dsn_context(node(), nullptr);\n\n            const char *name = ::dsn::tools::get_service_node_name(node());\n            char buffer[128];\n            sprintf(buffer, \"%s.asio.%d\", name, i);\n            task_worker::set_name(buffer);\n\n            boost::asio::io_service::work work(*_io_services[i]);\n            boost::system::error_code ec;\n            _io_services[i]->run(ec);\n            if (ec) {\n                dassert(false, \"boost::asio::io_service run failed: err(%s)\", ec.message().data());\n            }\n        }));\n    }\n\n    _acceptor = nullptr;\n\n    dassert(channel == RPC_CHANNEL_TCP || channel == RPC_CHANNEL_UDP,\n            \"invalid given channel %s\",\n            channel.to_string());\n\n    _address.assign_ipv4(get_local_ipv4(), port);\n\n    if (!client_only) {\n        auto v4_addr = boost::asio::ip::address_v4::any(); //(ntohl(_address.ip));\n        ::boost::asio::ip::tcp::endpoint endpoint(v4_addr, _address.port());\n        boost::system::error_code ec;\n        _acceptor.reset(new boost::asio::ip::tcp::acceptor(get_io_service()));\n        _acceptor->open(endpoint.protocol(), ec);\n        if (ec) {\n            derror(\"asio tcp acceptor open failed, error = %s\", ec.message().c_str());\n            _acceptor.reset();\n            return ERR_NETWORK_INIT_FAILED;\n        }\n        _acceptor->set_option(boost::asio::socket_base::reuse_address(true));\n        _acceptor->bind(endpoint, ec);\n        if (ec) {\n            derror(\"asio tcp acceptor bind failed, error = %s\", ec.message().c_str());\n            _acceptor.reset();\n            return ERR_NETWORK_INIT_FAILED;\n        }\n        int backlog = boost::asio::socket_base::max_connections;\n        _acceptor->listen(backlog, ec);\n        if (ec) {\n            derror(\"asio tcp acceptor listen failed, port = %u, error = %s\",\n                   _address.port(),\n                   ec.message().c_str());\n            _acceptor.reset();\n            return ERR_NETWORK_INIT_FAILED;\n        }\n        do_accept();\n    }\n\n    return ERR_OK;\n}\n\nrpc_session_ptr asio_network_provider::create_client_session(::dsn::rpc_address server_addr)\n{\n    auto sock = std::make_shared<boost::asio::ip::tcp::socket>(get_io_service());\n    message_parser_ptr parser(new_message_parser(_client_hdr_format));\n    return rpc_session_ptr(new asio_rpc_session(*this, server_addr, sock, parser, true));\n}\n\nvoid asio_network_provider::do_accept()\n{\n    auto socket = std::make_shared<boost::asio::ip::tcp::socket>(get_io_service());\n\n    _acceptor->async_accept(*socket, [this, socket](boost::system::error_code ec) {\n        if (!ec) {\n            auto remote = socket->remote_endpoint(ec);\n            if (ec) {\n                derror(\"failed to get the remote endpoint: %s\", ec.message().data());\n            } else {\n                auto ip = remote.address().to_v4().to_ulong();\n                auto port = remote.port();\n                ::dsn::rpc_address client_addr(ip, port);\n\n                message_parser_ptr null_parser;\n                rpc_session_ptr s =\n                    new asio_rpc_session(*this,\n                                         client_addr,\n                                         (std::shared_ptr<boost::asio::ip::tcp::socket> &)socket,\n                                         null_parser,\n                                         false);\n\n                // when server connection threshold is hit, close the session, otherwise accept it\n                if (check_if_conn_threshold_exceeded(s->remote_address())) {\n                    dwarn(\"close rpc connection from %s to %s due to hitting server \"\n                          \"connection threshold per ip\",\n                          s->remote_address().to_string(),\n                          address().to_string());\n                    s->close();\n                } else {\n                    on_server_session_accepted(s);\n\n                    // we should start read immediately after the rpc session is completely created.\n                    s->start_read_next();\n                }\n            }\n        }\n\n        do_accept();\n    });\n}\n\nvoid asio_udp_provider::send_message(message_ex *request)\n{\n    auto parser = get_message_parser(request->hdr_format);\n    parser->prepare_on_send(request);\n    auto lcount = parser->get_buffer_count_on_send(request);\n    std::unique_ptr<message_parser::send_buf[]> bufs(new message_parser::send_buf[lcount]);\n    auto rcount = parser->get_buffers_on_send(request, bufs.get());\n    dassert(lcount >= rcount, \"%d VS %d\", lcount, rcount);\n\n    size_t tlen = 0, offset = 0;\n    for (int i = 0; i < rcount; i++) {\n        tlen += bufs[i].sz;\n    }\n    dassert(tlen <= max_udp_packet_size, \"the message is too large to send via a udp channel\");\n\n    std::unique_ptr<char[]> packet_buffer(new char[tlen]);\n    for (int i = 0; i < rcount; i++) {\n        memcpy(&packet_buffer[offset], bufs[i].buf, bufs[i].sz);\n        offset += bufs[i].sz;\n    };\n\n    ::boost::asio::ip::udp::endpoint ep(::boost::asio::ip::address_v4(request->to_address.ip()),\n                                        request->to_address.port());\n    _socket->async_send_to(\n        ::boost::asio::buffer(packet_buffer.get(), tlen),\n        ep,\n        [=](const boost::system::error_code &error, std::size_t bytes_transferred) {\n            if (error) {\n                dwarn(\"send udp packet to ep %s:%d failed, message = %s\",\n                      ep.address().to_string().c_str(),\n                      ep.port(),\n                      error.message().c_str());\n                // we do not handle failure here, rpc matcher would handle timeouts\n            }\n        });\n    request->add_ref();\n    request->release_ref();\n}\n\nasio_udp_provider::asio_udp_provider(rpc_engine *srv, network *inner_provider)\n    : network(srv, inner_provider), _is_client(false), _recv_reader(_message_buffer_block_size)\n{\n    _parsers = new message_parser *[network_header_format::max_value() + 1];\n    memset(_parsers, 0, sizeof(message_parser *) * (network_header_format::max_value() + 1));\n}\n\nasio_udp_provider::~asio_udp_provider()\n{\n    for (int i = 0; i <= network_header_format::max_value(); i++) {\n        if (_parsers[i] != nullptr) {\n            delete _parsers[i];\n            _parsers[i] = nullptr;\n        }\n    }\n    delete[] _parsers;\n    _parsers = nullptr;\n\n    _io_service.stop();\n    for (auto &w : _workers) {\n        w->join();\n    }\n}\n\nmessage_parser *asio_udp_provider::get_message_parser(network_header_format hdr_format)\n{\n    if (_parsers[hdr_format] == nullptr) {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        if (_parsers[hdr_format] == nullptr) // double check\n        {\n            _parsers[hdr_format] = new_message_parser(hdr_format);\n        }\n    }\n    return _parsers[hdr_format];\n}\n\nvoid asio_udp_provider::do_receive()\n{\n    std::shared_ptr<::boost::asio::ip::udp::endpoint> send_endpoint(\n        new ::boost::asio::ip::udp::endpoint);\n\n    _recv_reader.truncate_read();\n    auto buffer_ptr = _recv_reader.read_buffer_ptr(max_udp_packet_size);\n    dassert(_recv_reader.read_buffer_capacity() >= max_udp_packet_size,\n            \"failed to load enough buffer in parser\");\n\n    _socket->async_receive_from(\n        ::boost::asio::buffer(buffer_ptr, max_udp_packet_size),\n        *send_endpoint,\n        [this, send_endpoint](const boost::system::error_code &error,\n                              std::size_t bytes_transferred) {\n            if (!!error) {\n                derror(\n                    \"%s: asio udp read failed: %s\", _address.to_string(), error.message().c_str());\n                do_receive();\n                return;\n            }\n\n            if (bytes_transferred < sizeof(uint32_t)) {\n                derror(\"%s: asio udp read failed: too short message\", _address.to_string());\n                do_receive();\n                return;\n            }\n\n            auto hdr_format = message_parser::get_header_type(_recv_reader._buffer.data());\n            if (NET_HDR_INVALID == hdr_format) {\n                derror(\"%s: asio udp read failed: invalid header type '%s'\",\n                       _address.to_string(),\n                       message_parser::get_debug_string(_recv_reader._buffer.data()).c_str());\n                do_receive();\n                return;\n            }\n\n            auto parser = get_message_parser(hdr_format);\n            parser->reset();\n\n            _recv_reader.mark_read(bytes_transferred);\n\n            int read_next = -1;\n\n            message_ex *msg = parser->get_message_on_receive(&_recv_reader, read_next);\n            if (msg == nullptr) {\n                derror(\"%s: asio udp read failed: invalid udp packet\", _address.to_string());\n                do_receive();\n                return;\n            }\n\n            msg->to_address = _address;\n            if (msg->header->context.u.is_request) {\n                on_recv_request(msg, 0);\n            } else {\n                on_recv_reply(msg->header->id, msg, 0);\n            }\n\n            do_receive();\n        });\n}\n\nerror_code asio_udp_provider::start(rpc_channel channel, int port, bool client_only)\n{\n    _is_client = client_only;\n    int io_service_worker_count =\n        (int)dsn_config_get_value_uint64(\"network\",\n                                         \"io_service_worker_count\",\n                                         1,\n                                         \"thread number for io service (timer and boost network)\");\n\n    dassert(channel == RPC_CHANNEL_UDP, \"invalid given channel %s\", channel.to_string());\n\n    if (client_only) {\n        do {\n            // FIXME: we actually do not need to set a random port for client if the rpc_engine is\n            // refactored\n            _address.assign_ipv4(get_local_ipv4(),\n                                 std::numeric_limits<uint16_t>::max() -\n                                     rand::next_u64(std::numeric_limits<uint64_t>::min(),\n                                                    std::numeric_limits<uint64_t>::max()) %\n                                         5000);\n            ::boost::asio::ip::udp::endpoint endpoint(boost::asio::ip::address_v4::any(),\n                                                      _address.port());\n            boost::system::error_code ec;\n            _socket.reset(new ::boost::asio::ip::udp::socket(_io_service));\n            _socket->open(endpoint.protocol(), ec);\n            if (ec) {\n                derror(\"asio udp socket open failed, error = %s\", ec.message().c_str());\n                _socket.reset();\n                continue;\n            }\n            _socket->bind(endpoint, ec);\n            if (ec) {\n                derror(\"asio udp socket bind failed, port = %u, error = %s\",\n                       _address.port(),\n                       ec.message().c_str());\n                _socket.reset();\n                continue;\n            }\n            break;\n        } while (true);\n    } else {\n        _address.assign_ipv4(get_local_ipv4(), port);\n        ::boost::asio::ip::udp::endpoint endpoint(boost::asio::ip::address_v4::any(),\n                                                  _address.port());\n        boost::system::error_code ec;\n        _socket.reset(new ::boost::asio::ip::udp::socket(_io_service));\n        _socket->open(endpoint.protocol(), ec);\n        if (ec) {\n            derror(\"asio udp socket open failed, error = %s\", ec.message().c_str());\n            _socket.reset();\n            return ERR_NETWORK_INIT_FAILED;\n        }\n        _socket->bind(endpoint, ec);\n        if (ec) {\n            derror(\"asio udp socket bind failed, port = %u, error = %s\",\n                   _address.port(),\n                   ec.message().c_str());\n            _socket.reset();\n            return ERR_NETWORK_INIT_FAILED;\n        }\n    }\n\n    for (int i = 0; i < io_service_worker_count; i++) {\n        _workers.push_back(std::make_shared<std::thread>([this, i]() {\n            task::set_tls_dsn_context(node(), nullptr);\n\n            const char *name = ::dsn::tools::get_service_node_name(node());\n            char buffer[128];\n            sprintf(buffer, \"%s.asio.udp.%d.%d\", name, (int)(this->address().port()), i);\n            task_worker::set_name(buffer);\n\n            boost::asio::io_service::work work(_io_service);\n            boost::system::error_code ec;\n            _io_service.run(ec);\n            if (ec) {\n                dassert(false, \"boost::asio::io_service run failed: err(%s)\", ec.message().data());\n            }\n        }));\n    }\n\n    do_receive();\n\n    return ERR_OK;\n}\n\n// use a round-robin scheme to choose the next io_service to use.\nboost::asio::io_service &asio_network_provider::get_io_service()\n{\n    return *_io_services[rand::next_u32(0, FLAGS_io_service_worker_count - 1)];\n}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/asio_net_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <boost/asio.hpp>\n\nnamespace dsn {\nnamespace tools {\n\n/// asio_network_provider is a wrapper of Asio library for rDSN to accept a connection and create\n/// sockets. Each io_service only allows one thread polling, so the operations of the single socket\n/// are always done in a single thread. we create many io_service instances to take advantage of the\n/// multi-core capabilities of the processor, and use the round-robin scheme to decide which\n/// io_service for socket to choose.\n///\n///    +-----------------------------------------------+\n///    |Linux kernel                                   |\n///    | +-----------+   +-----------+   +-----------+ |\n///    | |  Epoll1   |   |   Epoll2  |   |   Epoll3  | |\n///    | |           |   |           |   |           | |\n///    | | rfd 1,2,3 |   | rfd 4,5,6 |   | rfd 7,8,9 | |\n///    | |           |   |           |   |           | |\n///    | +-----^-----+   +-----^-----+   +-----^-----+ |\n///    +-------|---------------|---------------|-------+\n///       +-----------+   +-----------+   +-----------+\n///       |  polling  |   |  polling  |   |  polling  |\n///       | +-------+ |   | +-------+ |   | +-------+ |\n///       | |Thread1| |   | |Thread2| |   | |Thread3| |\n///       | +-------+ |   | +-------+ |   | +-------+ |\n///       |io_service1|   |io_service2|   |io_service3|\n///       +-----------+   +-----------+   +-----------+\n\nclass asio_network_provider : public connection_oriented_network\n{\npublic:\n    asio_network_provider(rpc_engine *srv, network *inner_provider);\n\n    ~asio_network_provider() override;\n\n    virtual error_code start(rpc_channel channel, int port, bool client_only) override;\n    virtual ::dsn::rpc_address address() override { return _address; }\n    virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr) override;\n\nprivate:\n    void do_accept();\n    boost::asio::io_service &get_io_service();\n\nprivate:\n    friend class asio_rpc_session;\n    friend class asio_network_provider_test;\n\n    std::shared_ptr<boost::asio::ip::tcp::acceptor> _acceptor;\n    std::vector<std::unique_ptr<boost::asio::io_service>> _io_services;\n    std::vector<std::shared_ptr<std::thread>> _workers;\n    ::dsn::rpc_address _address;\n};\n\n// TODO(Tangyanzhao): change the network model like asio_network_provider\nclass asio_udp_provider : public network\n{\npublic:\n    asio_udp_provider(rpc_engine *srv, network *inner_provider);\n\n    ~asio_udp_provider() override;\n\n    void send_message(message_ex *request) override;\n\n    virtual error_code start(rpc_channel channel, int port, bool client_only) override;\n\n    virtual ::dsn::rpc_address address() override { return _address; }\n\n    virtual void inject_drop_message(message_ex *msg, bool is_send) override\n    {\n        // nothing to do for UDP\n    }\n\nprivate:\n    void do_receive();\n\n    // create parser on demand\n    message_parser *get_message_parser(network_header_format hdr_format);\n\n    bool _is_client;\n    boost::asio::io_service _io_service;\n    std::shared_ptr<boost::asio::ip::udp::socket> _socket;\n    std::vector<std::shared_ptr<std::thread>> _workers;\n    ::dsn::rpc_address _address;\n    message_reader _recv_reader;\n\n    ::dsn::utils::ex_lock_nr _lock; // [\n    message_parser **_parsers;\n    // ]\n\n    static const size_t max_udp_packet_size = 1000;\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/asio_rpc_session.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"asio_rpc_session.h\"\n\nnamespace dsn {\nnamespace tools {\n\nvoid asio_rpc_session::set_options()\n{\n\n    if (_socket->is_open()) {\n        boost::system::error_code ec;\n        boost::asio::socket_base::send_buffer_size option, option2(16 * 1024 * 1024);\n        _socket->get_option(option, ec);\n        if (ec)\n            dwarn(\"asio socket get option failed, error = %s\", ec.message().c_str());\n        int old = option.value();\n        _socket->set_option(option2, ec);\n        if (ec)\n            dwarn(\"asio socket set option failed, error = %s\", ec.message().c_str());\n        _socket->get_option(option, ec);\n        if (ec)\n            dwarn(\"asio socket get option failed, error = %s\", ec.message().c_str());\n        dinfo(\"boost asio send buffer size is %u, set as 16MB, now is %u\", old, option.value());\n\n        boost::asio::socket_base::receive_buffer_size option3, option4(16 * 1024 * 1024);\n        _socket->get_option(option3, ec);\n        if (ec)\n            dwarn(\"asio socket get option failed, error = %s\", ec.message().c_str());\n        old = option3.value();\n        _socket->set_option(option4, ec);\n        if (ec)\n            dwarn(\"asio socket set option failed, error = %s\", ec.message().c_str());\n        _socket->get_option(option3, ec);\n        if (ec)\n            dwarn(\"asio socket get option failed, error = %s\", ec.message().c_str());\n        dinfo(\"boost asio recv buffer size is %u, set as 16MB, now is %u\", old, option.value());\n\n        // Nagle algorithm may cause an extra delay in some cases, because if\n        // the data in a single write spans 2n packets, the last packet will be\n        // withheld, waiting for the ACK for the previous packet. For more, please\n        // refer to <https://en.wikipedia.org/wiki/Nagle's_algorithm>.\n        //\n        // Disabling the Nagle algorithm would cause these effects:\n        //   * decrease delay time (positive)\n        //   * decrease the qps (negative)\n        _socket->set_option(boost::asio::ip::tcp::no_delay(true), ec);\n        if (ec)\n            dwarn(\"asio socket set option failed, error = %s\", ec.message().c_str());\n        dinfo(\"boost asio set no_delay = true\");\n    }\n}\n\nvoid asio_rpc_session::do_read(int read_next)\n{\n    add_ref();\n\n    void *ptr = _reader.read_buffer_ptr(read_next);\n    int remaining = _reader.read_buffer_capacity();\n\n    _socket->async_read_some(\n        boost::asio::buffer(ptr, remaining),\n        [this](boost::system::error_code ec, std::size_t length) {\n            if (!!ec) {\n                if (ec == boost::asio::error::make_error_code(boost::asio::error::eof)) {\n                    ddebug(\"asio read from %s failed: %s\",\n                           _remote_addr.to_string(),\n                           ec.message().c_str());\n                } else {\n                    derror(\"asio read from %s failed: %s\",\n                           _remote_addr.to_string(),\n                           ec.message().c_str());\n                }\n                on_failure();\n            } else {\n                _reader.mark_read(length);\n\n                int read_next = -1;\n\n                if (!_parser) {\n                    read_next = prepare_parser();\n                }\n\n                if (_parser) {\n                    message_ex *msg = _parser->get_message_on_receive(&_reader, read_next);\n\n                    while (msg != nullptr) {\n                        this->on_message_read(msg);\n                        msg = _parser->get_message_on_receive(&_reader, read_next);\n                    }\n                }\n\n                if (read_next == -1) {\n                    derror(\"asio read from %s failed\", _remote_addr.to_string());\n                    on_failure();\n                } else {\n                    start_read_next(read_next);\n                }\n            }\n\n            release_ref();\n        });\n}\n\nvoid asio_rpc_session::send(uint64_t signature)\n{\n    std::vector<boost::asio::const_buffer> asio_wbufs;\n    int bcount = (int)_sending_buffers.size();\n\n    // prepare buffers\n    asio_wbufs.resize(bcount);\n    for (int i = 0; i < bcount; i++) {\n        asio_wbufs[i] = boost::asio::const_buffer(_sending_buffers[i].buf, _sending_buffers[i].sz);\n    }\n\n    add_ref();\n\n    boost::asio::async_write(\n        *_socket, asio_wbufs, [this, signature](boost::system::error_code ec, std::size_t length) {\n            if (ec) {\n                derror(\n                    \"asio write to %s failed: %s\", _remote_addr.to_string(), ec.message().c_str());\n                on_failure(true);\n            } else {\n                on_send_completed(signature);\n            }\n\n            release_ref();\n        });\n}\n\nasio_rpc_session::asio_rpc_session(asio_network_provider &net,\n                                   ::dsn::rpc_address remote_addr,\n                                   std::shared_ptr<boost::asio::ip::tcp::socket> &socket,\n                                   message_parser_ptr &parser,\n                                   bool is_client)\n    : rpc_session(net, remote_addr, parser, is_client), _socket(socket)\n{\n    set_options();\n}\n\nvoid asio_rpc_session::close()\n{\n\n    boost::system::error_code ec;\n    _socket->shutdown(boost::asio::socket_base::shutdown_type::shutdown_both, ec);\n    if (ec)\n        dwarn(\"asio socket shutdown failed, error = %s\", ec.message().c_str());\n    _socket->close(ec);\n    if (ec)\n        dwarn(\"asio socket close failed, error = %s\", ec.message().c_str());\n}\n\nvoid asio_rpc_session::connect()\n{\n    if (set_connecting()) {\n        boost::asio::ip::tcp::endpoint ep(boost::asio::ip::address_v4(_remote_addr.ip()),\n                                          _remote_addr.port());\n\n        add_ref();\n        _socket->async_connect(ep, [this](boost::system::error_code ec) {\n            if (!ec) {\n                dinfo(\"client session %s connected\", _remote_addr.to_string());\n\n                set_options();\n                set_connected();\n                on_send_completed();\n                start_read_next();\n            } else {\n                derror(\"client session connect to %s failed, error = %s\",\n                       _remote_addr.to_string(),\n                       ec.message().c_str());\n                on_failure(true);\n            }\n            release_ref();\n        });\n    }\n}\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/asio_rpc_session.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/priority_queue.h>\n#include <dsn/tool-api/message_parser.h>\n#include <boost/asio.hpp>\n#include \"asio_net_provider.h\"\n\nnamespace dsn {\nnamespace tools {\n\n// A TCP session implementation based on Boost.Asio.\n// Thread-safe\nclass asio_rpc_session : public rpc_session\n{\npublic:\n    asio_rpc_session(asio_network_provider &net,\n                     ::dsn::rpc_address remote_addr,\n                     std::shared_ptr<boost::asio::ip::tcp::socket> &socket,\n                     message_parser_ptr &parser,\n                     bool is_client);\n\n    ~asio_rpc_session() override = default;\n\n    void send(uint64_t signature) override;\n\n    void close() override;\n\n    void connect() override;\n\nprivate:\n    void do_read(int read_next) override;\n    void set_options();\n    void on_message_read(message_ex *msg)\n    {\n        if (!on_recv_message(msg, 0)) {\n            on_failure(false);\n        }\n    }\n\nprivate:\n    // boost::asio::socket is thread-unsafe, must use lock to prevent a\n    // reading/writing socket being modified or closed concurrently.\n    std::shared_ptr<boost::asio::ip::tcp::socket> _socket;\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/dsn_message_parser.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     Jun. 2016, Zuoyan Qin, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"dsn_message_parser.h\"\n#include <dsn/service_api_c.h>\n#include <dsn/utility/crc.h>\n\nnamespace dsn {\nvoid dsn_message_parser::reset() { _header_checked = false; }\n\nmessage_ex *dsn_message_parser::get_message_on_receive(message_reader *reader,\n                                                       /*out*/ int &read_next)\n{\n    read_next = 4096;\n\n    dsn::blob &buf = reader->_buffer;\n    char *buf_ptr = (char *)buf.data();\n    unsigned int buf_len = reader->_buffer_occupied;\n\n    if (buf_len >= sizeof(message_header)) {\n        if (!_header_checked) {\n            if (!is_right_header(buf_ptr)) {\n                derror(\"dsn message header check failed\");\n                read_next = -1;\n                return nullptr;\n            } else {\n                _header_checked = true;\n            }\n        }\n\n        unsigned int msg_sz = sizeof(message_header) + message_ex::get_body_length(buf_ptr);\n\n        // msg done\n        if (buf_len >= msg_sz) {\n            dsn::blob msg_bb = buf.range(0, msg_sz);\n            message_ex *msg = message_ex::create_receive_message(msg_bb);\n            if (!is_right_body(msg)) {\n                message_header *header = (message_header *)buf_ptr;\n                derror(\"dsn message body check failed, id = %\" PRIu64 \", trace_id = %016\" PRIx64\n                       \", rpc_name = %s, from_addr = %s\",\n                       header->id,\n                       header->trace_id,\n                       header->rpc_name,\n                       header->from_address.to_string());\n                read_next = -1;\n                delete msg;\n                return nullptr;\n            } else {\n                reader->_buffer = buf.range(msg_sz);\n                reader->_buffer_occupied -= msg_sz;\n                _header_checked = false;\n                read_next = (reader->_buffer_occupied >= sizeof(message_header)\n                                 ? 0\n                                 : sizeof(message_header) - reader->_buffer_occupied);\n                msg->hdr_format = NET_HDR_DSN;\n                return msg;\n            }\n        } else { // buf_len < msg_sz\n            read_next = msg_sz - buf_len;\n            return nullptr;\n        }\n    } else { // buf_len < sizeof(message_header)\n        read_next = sizeof(message_header) - buf_len;\n        return nullptr;\n    }\n}\n\nvoid dsn_message_parser::prepare_on_send(message_ex *msg)\n{\n    auto &header = msg->header;\n    auto &buffers = msg->buffers;\n\n#ifndef NDEBUG\n    int i_max = (int)buffers.size() - 1;\n    size_t len = 0;\n    for (int i = 0; i <= i_max; i++) {\n        len += (size_t)buffers[i].length();\n    }\n    dassert(len == (size_t)header->body_length + sizeof(message_header), \"data length is wrong\");\n#endif\n\n    if (task_spec::get(msg->local_rpc_code)->rpc_message_crc_required) {\n        // compute data crc if necessary (only once for the first time)\n        if (header->body_crc32 == CRC_INVALID) {\n            int i_max = (int)buffers.size() - 1;\n            uint32_t crc32 = 0;\n            size_t len = 0;\n            for (int i = 0; i <= i_max; i++) {\n                uint32_t lcrc;\n                const void *ptr;\n                size_t sz;\n\n                if (i == 0) {\n                    ptr = (const void *)(buffers[i].data() + sizeof(message_header));\n                    sz = (size_t)buffers[i].length() - sizeof(message_header);\n                } else {\n                    ptr = (const void *)buffers[i].data();\n                    sz = (size_t)buffers[i].length();\n                }\n\n                lcrc = dsn::utils::crc32_calc(ptr, sz, crc32);\n                crc32 = dsn::utils::crc32_concat(0, 0, crc32, len, crc32, lcrc, sz);\n\n                len += sz;\n            }\n\n            dassert(len == (size_t)header->body_length, \"data length is wrong\");\n            header->body_crc32 = crc32;\n        }\n\n        // always compute header crc\n        header->hdr_crc32 = CRC_INVALID;\n        header->hdr_crc32 = dsn::utils::crc32_calc(header, sizeof(message_header), 0);\n    }\n}\n\nint dsn_message_parser::get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers)\n{\n    int i = 0;\n    for (auto &buf : msg->buffers) {\n        buffers[i].buf = (void *)buf.data();\n        buffers[i].sz = buf.length();\n        ++i;\n    }\n    return i;\n}\n\n/*static*/ bool dsn_message_parser::is_right_header(char *hdr)\n{\n    uint32_t *pcrc = reinterpret_cast<uint32_t *>(hdr + FIELD_OFFSET(message_header, hdr_crc32));\n    uint32_t crc32 = *pcrc;\n    if (crc32 != CRC_INVALID) {\n        *pcrc = CRC_INVALID;\n        bool r = (crc32 == dsn::utils::crc32_calc(hdr, sizeof(message_header), 0));\n        *pcrc = crc32;\n        if (!r) {\n            derror(\"dsn message header crc check failed\");\n        }\n        return r;\n    }\n\n    // crc is not enabled\n    else {\n        return true;\n    }\n}\n\n/*static*/ bool dsn_message_parser::is_right_body(message_ex *msg)\n{\n    auto &header = msg->header;\n    auto &buffers = msg->buffers;\n\n    if (header->body_crc32 != CRC_INVALID) {\n        int i_max = (int)buffers.size() - 1;\n        uint32_t crc32 = 0;\n        size_t len = 0;\n        for (int i = 0; i <= i_max; i++) {\n            const void *ptr = (const void *)buffers[i].data();\n            size_t sz = (size_t)buffers[i].length();\n\n            uint32_t lcrc = dsn::utils::crc32_calc(ptr, sz, crc32);\n            crc32 = dsn::utils::crc32_concat(0, 0, crc32, len, crc32, lcrc, sz);\n\n            len += sz;\n        }\n\n        dassert(len == (size_t)header->body_length, \"data length is wrong\");\n\n        bool r = (header->body_crc32 == crc32);\n        if (!r) {\n            derror(\"dsn message body crc check failed\");\n        }\n        return r;\n    }\n\n    // crc is not enabled\n    else {\n        return true;\n    }\n}\n}\n"
  },
  {
    "path": "src/runtime/rpc/dsn_message_parser.h",
    "content": "/*\n* The MIT License (MIT)\n*\n* Copyright (c) 2015 Microsoft Corporation\n*\n* -=- Robust Distributed System Nucleus (rDSN) -=-\n*\n* Permission is hereby granted, free of charge, to any person obtaining a copy\n* of this software and associated documentation files (the \"Software\"), to deal\n* in the Software without restriction, including without limitation the rights\n* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n* copies of the Software, and to permit persons to whom the Software is\n* furnished to do so, subject to the following conditions:\n*\n* The above copyright notice and this permission notice shall be included in\n* all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n* THE SOFTWARE.\n*/\n\n/*\n* Description:\n*     message parser for browser-generated http request\n*\n* Revision history:\n*     Jun. 2016, Zuoyan Qin, first version\n*     xxxx-xx-xx, author, fix bug about xxx\n*/\n\n#pragma once\n\n#include <dsn/tool-api/message_parser.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\nclass dsn_message_parser : public message_parser\n{\npublic:\n    dsn_message_parser() : _header_checked(false) {}\n    virtual ~dsn_message_parser() {}\n\n    virtual void reset() override;\n\n    virtual message_ex *get_message_on_receive(message_reader *reader,\n                                               /*out*/ int &read_next) override;\n\n    virtual void prepare_on_send(message_ex *msg) override;\n\n    virtual int get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers) override;\n\nprivate:\n    static bool is_right_header(char *hdr);\n\n    static bool is_right_body(message_ex *msg);\n\nprivate:\n    bool _header_checked;\n};\n}\n"
  },
  {
    "path": "src/runtime/rpc/message_parser.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"message_parser_manager.h\"\n#include <dsn/service_api_c.h>\n\nnamespace dsn {\n\n// ------------------- header type ------------------------------\nstruct header_type\n{\npublic:\n    union\n    {\n        char stype[4];\n        int32_t itype;\n    } type;\n\n    header_type() { type.itype = -1; }\n\n    header_type(int32_t itype) { type.itype = itype; }\n\n    header_type(const char *str) { memcpy(type.stype, str, sizeof(int32_t)); }\n\n    header_type(const header_type &another) { type.itype = another.type.itype; }\n\n    header_type &operator=(const header_type &another)\n    {\n        type.itype = another.type.itype;\n        return *this;\n    }\n\n    bool operator==(const header_type &other) const { return type.itype == other.type.itype; }\n\n    bool operator!=(const header_type &other) const { return type.itype != other.type.itype; }\n\n    std::string debug_string() const;\n\npublic:\n    static network_header_format header_type_to_c_type(const header_type &hdr_type);\n    static void register_header_signature(int32_t sig, network_header_format type);\n\nprivate:\n    static std::unordered_map<int32_t, network_header_format> s_fmt_map;\n};\n\nstd::unordered_map<int32_t, network_header_format> header_type::s_fmt_map;\n\nstd::string header_type::debug_string() const\n{\n    char buf[20];\n    char *ptr = buf;\n    for (int i = 0; i < 4; ++i) {\n        auto &c = type.stype[i];\n        if (isprint(c)) {\n            *ptr++ = c;\n        } else {\n            sprintf(ptr, \"\\\\%02X\", c);\n            ptr += 3;\n        }\n    }\n    *ptr = '\\0';\n    return std::string(buf);\n}\n\n/*static*/ network_header_format header_type::header_type_to_c_type(const header_type &hdr_type)\n{\n    auto it = s_fmt_map.find(hdr_type.type.itype);\n    if (it != s_fmt_map.end()) {\n        return it->second;\n    } else\n        return NET_HDR_INVALID;\n}\n\n/*static*/ void header_type::register_header_signature(int32_t sig, network_header_format type)\n{\n    auto it = s_fmt_map.find(sig);\n    if (it != s_fmt_map.end()) {\n        if (it->second != type) {\n            dassert(false,\n                    \"signature %08x is already registerd for header type %s\",\n                    sig,\n                    type.to_string());\n        }\n    } else {\n        s_fmt_map.emplace(sig, type);\n    }\n}\n\n/*static*/ network_header_format message_parser::get_header_type(const char *bytes)\n{\n    header_type ht(bytes);\n    return header_type::header_type_to_c_type(ht);\n}\n\n/*static*/ std::string message_parser::get_debug_string(const char *bytes)\n{\n    header_type ht(bytes);\n    return ht.debug_string();\n}\n\n//-------------------- msg reader --------------------\nchar *message_reader::read_buffer_ptr(unsigned int read_next)\n{\n    if (read_next + _buffer_occupied > _buffer.length()) {\n        // remember currently read content\n        blob rb;\n        if (_buffer_occupied > 0)\n            rb = _buffer.range(0, _buffer_occupied);\n\n        // switch to next\n        unsigned int sz =\n            (read_next + _buffer_occupied > _buffer_block_size ? read_next + _buffer_occupied\n                                                               : _buffer_block_size);\n        // TODO(wutao1): make it a buffer queue like what sofa-pbrpc does\n        //               (https://github.com/baidu/sofa-pbrpc/blob/master/src/sofa/pbrpc/buffer.h)\n        //               to reduce memory copy.\n        _buffer.assign(dsn::utils::make_shared_array<char>(sz), 0, sz);\n        _buffer_occupied = 0;\n\n        // copy\n        if (rb.length() > 0) {\n            // every read buffer_block_size data may cause one copy\n            memcpy((void *)_buffer.data(), (const void *)rb.data(), rb.length());\n            _buffer_occupied = rb.length();\n        }\n\n        dassert(read_next + _buffer_occupied <= _buffer.length(),\n                \"%u(%u + %u) VS %u\",\n                read_next + _buffer_occupied,\n                read_next,\n                _buffer_occupied,\n                _buffer.length());\n    }\n\n    return (char *)(_buffer.data() + _buffer_occupied);\n}\n\n//-------------------- msg parser manager --------------------\nvoid message_parser_manager::register_factory(network_header_format fmt,\n                                              const std::vector<const char *> &signatures,\n                                              message_parser::factory f,\n                                              size_t sz)\n{\n    if (static_cast<unsigned int>(fmt) >= _factory_vec.size()) {\n        _factory_vec.resize(fmt + 1);\n    }\n\n    parser_factory_info &info = _factory_vec[fmt];\n    info.fmt = fmt;\n    info.factory = f;\n    info.parser_size = sz;\n\n    for (auto &v : signatures) {\n        header_type type(v);\n        header_type::register_header_signature(type.type.itype, fmt);\n    }\n}\n\nmessage_parser *message_parser_manager::create_parser(network_header_format fmt)\n{\n    parser_factory_info &info = _factory_vec[fmt];\n    if (info.factory)\n        return info.factory();\n    else\n        return nullptr;\n}\n}\n"
  },
  {
    "path": "src/runtime/rpc/message_parser_manager.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     message parser manager\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool-api/message_parser.h>\n\nnamespace dsn {\nclass message_parser_manager : public utils::singleton<message_parser_manager>\n{\npublic:\n    struct parser_factory_info\n    {\n        parser_factory_info() : fmt(NET_HDR_INVALID), factory(nullptr), parser_size(0) {}\n\n        network_header_format fmt;\n        message_parser::factory factory;\n        size_t parser_size;\n    };\n\npublic:\n    // called only during system init, thread-unsafe\n    void register_factory(network_header_format fmt,\n                          const std::vector<const char *> &signatures,\n                          message_parser::factory f,\n                          size_t sz);\n\n    message_parser *create_parser(network_header_format fmt);\n    const parser_factory_info &get(network_header_format fmt) { return _factory_vec[fmt]; }\n\nprivate:\n    friend class utils::singleton<message_parser_manager>;\n    message_parser_manager() = default;\n    ~message_parser_manager() = default;\n\n    std::vector<parser_factory_info> _factory_vec;\n};\n}\n"
  },
  {
    "path": "src/runtime/rpc/network.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"message_parser_manager.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n\n#include <dsn/tool-api/network.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/flags.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\n/*static*/ join_point<void, rpc_session *>\n    rpc_session::on_rpc_session_connected(\"rpc.session.connected\");\n/*static*/ join_point<void, rpc_session *>\n    rpc_session::on_rpc_session_disconnected(\"rpc.session.disconnected\");\n/*static*/ join_point<bool, message_ex *>\n    rpc_session::on_rpc_recv_message(\"rpc.session.recv.message\");\n/*static*/ join_point<bool, message_ex *>\n    rpc_session::on_rpc_send_message(\"rpc.session.send.message\");\n\nrpc_session::~rpc_session()\n{\n    clear_pending_messages();\n    clear_send_queue(false);\n\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        dassert(0 == _sending_msgs.size(), \"sending queue is not cleared yet\");\n        dassert(0 == _message_count, \"sending queue is not cleared yet\");\n    }\n}\n\nbool rpc_session::set_connecting()\n{\n    dassert(is_client(), \"must be client session\");\n\n    utils::auto_lock<utils::ex_lock_nr> l(_lock);\n    if (_connect_state == SS_DISCONNECTED) {\n        _connect_state = SS_CONNECTING;\n        return true;\n    } else {\n        return false;\n    }\n}\n\nvoid rpc_session::set_connected()\n{\n    dassert(is_client(), \"must be client session\");\n\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        dcheck_eq(_connect_state, SS_CONNECTING);\n        _connect_state = SS_CONNECTED;\n    }\n\n    rpc_session_ptr sp = this;\n    _net.on_client_session_connected(sp);\n\n    on_rpc_session_connected.execute(this);\n}\n\nbool rpc_session::set_disconnected()\n{\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        if (_connect_state != SS_DISCONNECTED) {\n            _connect_state = SS_DISCONNECTED;\n        } else {\n            return false;\n        }\n    }\n\n    on_rpc_session_disconnected.execute(this);\n    return true;\n}\n\nvoid rpc_session::clear_send_queue(bool resend_msgs)\n{\n    //\n    // - in concurrent case, resending _sending_msgs and _messages\n    //   may not maintain the original sending order\n    // - can optimize by batch sending instead of sending one by one\n    //\n    // however, our threading model cannot ensure in-order processing\n    // of incoming messages neither, so this guarantee is not necesssary\n    // and the upper applications should not always rely on this (but can\n    // rely on this with a high probability).\n    //\n\n    std::vector<message_ex *> swapped_sending_msgs;\n    {\n        // protect _sending_msgs and _sending_buffers in lock\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        _sending_msgs.swap(swapped_sending_msgs);\n        _sending_buffers.clear();\n    }\n\n    // resend pending messages if need\n    for (auto &msg : swapped_sending_msgs) {\n        if (resend_msgs) {\n            _net.send_message(msg);\n        }\n\n        // if not resend, the message's callback will not be invoked until timeout,\n        // it's too slow - let's try to mimic the failure by recving an empty reply\n        else if (msg->header->context.u.is_request && !msg->header->context.u.is_forwarded) {\n            _net.on_recv_reply(msg->header->id, nullptr, 0);\n        }\n\n        // added in rpc_engine::reply (for server) or rpc_session::send_message (for client)\n        msg->release_ref();\n    }\n\n    while (true) {\n        dlink *msg;\n        {\n            utils::auto_lock<utils::ex_lock_nr> l(_lock);\n            msg = _messages.next();\n            if (msg == &_messages)\n                break;\n\n            msg->remove();\n            --_message_count;\n        }\n\n        auto rmsg = CONTAINING_RECORD(msg, message_ex, dl);\n        rmsg->io_session = nullptr;\n\n        if (resend_msgs) {\n            _net.send_message(rmsg);\n        }\n\n        // if not resend, the message's callback will not be invoked until timeout,\n        // it's too slow - let's try to mimic the failure by recving an empty reply\n        else if (rmsg->header->context.u.is_request && !rmsg->header->context.u.is_forwarded) {\n            _net.on_recv_reply(rmsg->header->id, nullptr, 0);\n        }\n\n        // added in rpc_engine::reply (for server) or rpc_session::send_message (for client)\n        rmsg->release_ref();\n    }\n}\n\ninline bool rpc_session::unlink_message_for_send()\n{\n    auto n = _messages.next();\n    int bcount = 0;\n\n    dbg_dassert(0 == _sending_buffers.size(),\n                \"sending_buffers should be empty, but size = %d\",\n                (int)_sending_buffers.size());\n    dbg_dassert(0 == _sending_msgs.size(),\n                \"sending_msgs should be empty, but size = %d\",\n                (int)_sending_msgs.size());\n\n    while (n != &_messages) {\n        auto lmsg = CONTAINING_RECORD(n, message_ex, dl);\n        auto lcount = _parser->get_buffer_count_on_send(lmsg);\n        if (bcount > 0 && bcount + lcount > _max_buffer_block_count_per_send) {\n            break;\n        }\n\n        _sending_buffers.resize(bcount + lcount);\n        auto rcount = _parser->get_buffers_on_send(lmsg, &_sending_buffers[bcount]);\n        dassert(lcount >= rcount, \"%d VS %d\", lcount, rcount);\n        if (lcount != rcount)\n            _sending_buffers.resize(bcount + rcount);\n        bcount += rcount;\n        _sending_msgs.push_back(lmsg);\n\n        n = n->next();\n        lmsg->dl.remove();\n    }\n\n    // added in send_message\n    _message_count -= (int)_sending_msgs.size();\n    return _sending_msgs.size() > 0;\n}\n\nDEFINE_TASK_CODE(LPC_DELAY_RPC_REQUEST_RATE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nvoid rpc_session::start_read_next(int read_next)\n{\n    // server only\n    if (!is_client()) {\n        int delay_ms = _delay_server_receive_ms.exchange(0);\n\n        // delayed read\n        if (delay_ms > 0) {\n            this->add_ref();\n            dsn::task_ptr delay_task(new raw_task(LPC_DELAY_RPC_REQUEST_RATE, [this]() {\n                start_read_next();\n                this->release_ref();\n            }));\n            delay_task->enqueue(std::chrono::milliseconds(delay_ms));\n        } else {\n            do_read(read_next);\n        }\n    } else {\n        do_read(read_next);\n    }\n}\n\nint rpc_session::prepare_parser()\n{\n    if (_reader._buffer_occupied < sizeof(uint32_t))\n        return sizeof(uint32_t) - _reader._buffer_occupied;\n\n    auto hdr_format = message_parser::get_header_type(_reader._buffer.data());\n    if (hdr_format == NET_HDR_INVALID) {\n        hdr_format = _net.unknown_msg_hdr_format();\n\n        if (hdr_format == NET_HDR_INVALID) {\n            derror(\"invalid header type, remote_client = %s, header_type = '%s'\",\n                   _remote_addr.to_string(),\n                   message_parser::get_debug_string(_reader._buffer.data()).c_str());\n            return -1;\n        }\n    }\n    _parser = _net.new_message_parser(hdr_format);\n    dinfo(\"message parser created, remote_client = %s, header_format = %s\",\n          _remote_addr.to_string(),\n          hdr_format.to_string());\n\n    return 0;\n}\n\nvoid rpc_session::send_message(message_ex *msg)\n{\n    msg->add_ref(); // released in on_send_completed\n    msg->io_session = this;\n\n    // ignore msg if join point return false\n    if (dsn_unlikely(!on_rpc_send_message.execute(msg, true))) {\n        msg->release_ref();\n        return;\n    }\n\n    dassert(_parser, \"parser should not be null when send\");\n    _parser->prepare_on_send(msg);\n\n    uint64_t sig;\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        msg->dl.insert_before(&_messages);\n        ++_message_count;\n\n        if ((SS_CONNECTED == _connect_state) && !_is_sending_next) {\n            _is_sending_next = true;\n            sig = _message_sent + 1;\n            unlink_message_for_send();\n        } else {\n            return;\n        }\n    }\n\n    this->send(sig);\n}\n\nbool rpc_session::cancel(message_ex *request)\n{\n    if (request->io_session.get() != this)\n        return false;\n\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        if (request->dl.is_alone())\n            return false;\n\n        request->dl.remove();\n        --_message_count;\n    }\n\n    // added in rpc_engine::reply (for server) or rpc_session::send_message (for client)\n    request->release_ref();\n    request->io_session = nullptr;\n    return true;\n}\n\nvoid rpc_session::on_send_completed(uint64_t signature)\n{\n    uint64_t sig = 0;\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        if (signature != 0) {\n            dassert(_is_sending_next && signature == _message_sent + 1, \"sent msg must be sending\");\n            _is_sending_next = false;\n\n            // the _sending_msgs may have been cleared when reading of the rpc_session is failed.\n            if (_sending_msgs.size() == 0) {\n                dassert(_connect_state == SS_DISCONNECTED,\n                        \"assume sending queue is cleared due to session closed\");\n                return;\n            }\n\n            for (auto &msg : _sending_msgs) {\n                // added in rpc_engine::reply (for server) or rpc_session::send_message (for client)\n                msg->release_ref();\n                _message_sent++;\n            }\n            _sending_msgs.clear();\n            _sending_buffers.clear();\n        }\n\n        if (!_is_sending_next) {\n            if (unlink_message_for_send()) {\n                sig = _message_sent + 1;\n                _is_sending_next = true;\n            }\n        }\n    }\n\n    // for next send messages\n    if (sig != 0)\n        this->send(sig);\n}\n\nrpc_session::rpc_session(connection_oriented_network &net,\n                         ::dsn::rpc_address remote_addr,\n                         message_parser_ptr &parser,\n                         bool is_client)\n    : _connect_state(is_client ? SS_DISCONNECTED : SS_CONNECTED),\n      _message_count(0),\n      _is_sending_next(false),\n      _message_sent(0),\n      _net(net),\n      _remote_addr(remote_addr),\n      _max_buffer_block_count_per_send(net.max_buffer_block_count_per_send()),\n      _reader(net.message_buffer_block_size()),\n      _parser(parser),\n\n      _is_client(is_client),\n      _matcher(_net.engine()->matcher()),\n      _delay_server_receive_ms(0)\n{\n    if (!is_client) {\n        on_rpc_session_connected.execute(this);\n    }\n}\n\nbool rpc_session::on_disconnected(bool is_write)\n{\n    bool ret;\n    if (set_disconnected()) {\n        rpc_session_ptr sp = this;\n        if (is_client()) {\n            _net.on_client_session_disconnected(sp);\n        } else {\n            _net.on_server_session_disconnected(sp);\n        }\n\n        ret = true;\n    } else {\n        ret = false;\n    }\n\n    if (is_write) {\n        clear_send_queue(false);\n    }\n\n    return ret;\n}\n\nvoid rpc_session::on_failure(bool is_write)\n{\n    if (on_disconnected(is_write)) {\n        close();\n    }\n}\n\nbool rpc_session::on_recv_message(message_ex *msg, int delay_ms)\n{\n    if (msg->header->from_address.is_invalid())\n        msg->header->from_address = _remote_addr;\n    msg->to_address = _net.address();\n    msg->io_session = this;\n\n    // ignore msg if join point return false\n    if (dsn_unlikely(!on_rpc_recv_message.execute(msg, true))) {\n        delete msg;\n        return false;\n    }\n\n    if (msg->header->context.u.is_request) {\n        // ATTENTION: need to check if self connection occurred.\n        //\n        // When we try to connect some socket in the same host, if we don't bind the client to a\n        // specific port,\n        // operating system will provide ephemeral port for us. If it's happened to be the one we\n        // want to connect to,\n        // it causes self connection.\n        //\n        // The case is:\n        // - this session is a client session\n        // - the remote address is in the same host\n        // - the remote address is not listened, which means the remote port is not occupied\n        // - operating system chooses the remote port as client's ephemeral port\n        if (is_client() && msg->header->from_address == _net.engine()->primary_address()) {\n            derror(\"self connection detected, address = %s\", msg->header->from_address.to_string());\n            dassert(msg->get_count() == 0, \"message should not be referenced by anybody so far\");\n            delete msg;\n            return false;\n        }\n\n        dbg_dassert(!is_client(), \"only rpc server session can recv rpc requests\");\n        _net.on_recv_request(msg, delay_ms);\n    }\n\n    // both rpc server session and rpc client session can receive rpc reply\n    // specially, rpc client session can receive general rpc reply,\n    // and rpc server session can receive forwarded rpc reply\n    else {\n        _matcher->on_recv_reply(&_net, msg->header->id, msg, delay_ms);\n    }\n\n    return true;\n}\n\nbool rpc_session::try_pend_message(message_ex *msg)\n{\n    // if negotiation is not succeed, we should pend msg,\n    // in order to resend it when the negotiation is succeed\n    if (dsn_unlikely(!negotiation_succeed)) {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        if (!negotiation_succeed) {\n            msg->add_ref();\n            _pending_messages.push_back(msg);\n            return true;\n        }\n    }\n    return false;\n}\n\nvoid rpc_session::clear_pending_messages()\n{\n    utils::auto_lock<utils::ex_lock_nr> l(_lock);\n    for (auto msg : _pending_messages) {\n        msg->release_ref();\n    }\n    _pending_messages.clear();\n}\n\nvoid rpc_session::set_negotiation_succeed()\n{\n    std::vector<message_ex *> swapped_pending_msgs;\n    {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        negotiation_succeed = true;\n\n        _pending_messages.swap(swapped_pending_msgs);\n    }\n\n    // resend the pending messages\n    for (auto msg : swapped_pending_msgs) {\n        send_message(msg);\n        msg->release_ref();\n    }\n}\n\nbool rpc_session::is_negotiation_succeed() const\n{\n    // double check. the first one don't lock the _lock.\n    // Because negotiation_succeed only transfered from false to true.\n    // So if it is true now, it will not change in the later.\n    // But if it is false now, maybe it will change soon. So we should use lock to protect it.\n    if (dsn_likely(negotiation_succeed)) {\n        return negotiation_succeed;\n    } else {\n        utils::auto_lock<utils::ex_lock_nr> l(_lock);\n        return negotiation_succeed;\n    }\n}\n\nvoid rpc_session::set_client_username(const std::string &user_name)\n{\n    _client_username = user_name;\n}\n\nconst std::string &rpc_session::get_client_username() const { return _client_username; }\n\n////////////////////////////////////////////////////////////////////////////////////////////////\nnetwork::network(rpc_engine *srv, network *inner_provider)\n    : _engine(srv), _client_hdr_format(NET_HDR_DSN), _unknown_msg_header_format(NET_HDR_INVALID)\n{\n    _message_buffer_block_size = 1024 * 64;\n    _max_buffer_block_count_per_send = 64; // TODO: windows, how about the other platforms?\n    _send_queue_threshold =\n        (int)dsn_config_get_value_uint64(\"network\",\n                                         \"send_queue_threshold\",\n                                         4 * 1024,\n                                         \"send queue size above which throttling is applied\");\n\n    _unknown_msg_header_format = network_header_format::from_string(\n        dsn_config_get_value_string(\n            \"network\",\n            \"unknown_message_header_format\",\n            NET_HDR_INVALID.to_string(),\n            \"format for unknown message headers, default is NET_HDR_INVALID\"),\n        NET_HDR_INVALID);\n}\n\nvoid network::reset_parser_attr(network_header_format client_hdr_format,\n                                int message_buffer_block_size)\n{\n    _client_hdr_format = client_hdr_format;\n    _message_buffer_block_size = message_buffer_block_size;\n}\n\nservice_node *network::node() const { return _engine->node(); }\n\nvoid network::on_recv_request(message_ex *msg, int delay_ms)\n{\n    return _engine->on_recv_request(this, msg, delay_ms);\n}\n\nvoid network::on_recv_reply(uint64_t id, message_ex *msg, int delay_ms)\n{\n    _engine->matcher()->on_recv_reply(this, id, msg, delay_ms);\n}\n\nmessage_parser *network::new_message_parser(network_header_format hdr_format)\n{\n    message_parser *parser = message_parser_manager::instance().create_parser(hdr_format);\n    dassert(parser, \"message parser '%s' not registerd or invalid!\", hdr_format.to_string());\n    return parser;\n}\n\nuint32_t network::get_local_ipv4()\n{\n    static const char *explicit_host =\n        dsn_config_get_value_string(\"network\",\n                                    \"explicit_host_address\",\n                                    \"\",\n                                    \"explicit host name or ip (v4) assigned to this \"\n                                    \"node (e.g., service ip for pods in kubernets)\");\n\n    static const char *inteface =\n        dsn_config_get_value_string(\"network\",\n                                    \"primary_interface\",\n                                    \"\",\n                                    \"network interface name used to init primary ipv4 \"\n                                    \"address, if empty, means using a site local address\");\n\n    uint32_t ip = 0;\n\n    if (strlen(explicit_host) > 0) {\n        ip = rpc_address::ipv4_from_host(explicit_host);\n    }\n\n    if (0 == ip) {\n        ip = rpc_address::ipv4_from_network_interface(inteface);\n    }\n\n    if (0 == ip) {\n        char name[128];\n        if (gethostname(name, sizeof(name)) != 0) {\n            dassert(false, \"gethostname failed, err = %s\", strerror(errno));\n        }\n        ip = rpc_address::ipv4_from_host(name);\n    }\n\n    return ip;\n}\n\nconnection_oriented_network::connection_oriented_network(rpc_engine *srv, network *inner_provider)\n    : network(srv, inner_provider)\n{\n    _cfg_conn_threshold_per_ip = 0;\n    _client_session_count.init_global_counter(\"server\",\n                                              \"network\",\n                                              \"client_session_count\",\n                                              COUNTER_TYPE_NUMBER,\n                                              \"current session count on server\");\n}\n\nvoid connection_oriented_network::inject_drop_message(message_ex *msg, bool is_send)\n{\n    rpc_session_ptr s = msg->io_session;\n    if (s == nullptr) {\n        // - if io_session == nulltr, there must be is_send == true;\n        // - but if is_send == true, there may be is_session != nullptr, when it is a\n        //   normal (not forwarding) reply message from server to client, in which case\n        //   the io_session has also been set.\n        dassert(is_send, \"received message should always has io_session set\");\n        utils::auto_read_lock l(_clients_lock);\n        auto it = _clients.find(msg->to_address);\n        if (it != _clients.end()) {\n            s = it->second;\n        }\n    }\n\n    if (s != nullptr) {\n        s->close();\n    }\n}\n\nvoid connection_oriented_network::send_message(message_ex *request)\n{\n    rpc_session_ptr client = nullptr;\n    auto &to = request->to_address;\n\n    // TODO: thread-local client ptr cache\n    {\n        utils::auto_read_lock l(_clients_lock);\n        auto it = _clients.find(to);\n        if (it != _clients.end()) {\n            client = it->second;\n        }\n    }\n\n    int ip_count = 0;\n    bool new_client = false;\n    if (nullptr == client.get()) {\n        utils::auto_write_lock l(_clients_lock);\n        auto it = _clients.find(to);\n        if (it != _clients.end()) {\n            client = it->second;\n        } else {\n            client = create_client_session(to);\n            _clients.insert(client_sessions::value_type(to, client));\n            new_client = true;\n        }\n        ip_count = (int)_clients.size();\n    }\n\n    // init connection if necessary\n    if (new_client) {\n        ddebug(\"client session created, remote_server = %s, current_count = %d\",\n               client->remote_address().to_string(),\n               ip_count);\n        _client_session_count->set(ip_count);\n        client->connect();\n    }\n\n    // rpc call\n    client->send_message(request);\n}\n\nrpc_session_ptr connection_oriented_network::get_server_session(::dsn::rpc_address ep)\n{\n    utils::auto_read_lock l(_servers_lock);\n    auto it = _servers.find(ep);\n    return it != _servers.end() ? it->second : nullptr;\n}\n\nvoid connection_oriented_network::on_server_session_accepted(rpc_session_ptr &s)\n{\n    int ip_count = 0;\n    int ip_conn_count = 1;\n    {\n        utils::auto_write_lock l(_servers_lock);\n\n        auto pr = _servers.insert(server_sessions::value_type(s->remote_address(), s));\n        if (pr.second) {\n            // nothing to do\n        } else {\n            pr.first->second = s;\n            dwarn(\"server session already exists, remote_client = %s, preempted\",\n                  s->remote_address().to_string());\n        }\n        ip_count = (int)_servers.size();\n\n        auto pr2 =\n            _ip_conn_count.insert(ip_connection_count::value_type(s->remote_address().ip(), 1));\n        if (!pr2.second) {\n            ip_conn_count = ++pr2.first->second;\n        }\n    }\n\n    ddebug(\"server session accepted, remote_client = %s, current_count = %d\",\n           s->remote_address().to_string(),\n           ip_count);\n\n    ddebug(\"ip session %s, remote_client = %s, current_count = %d\",\n           ip_conn_count == 1 ? \"inserted\" : \"increased\",\n           s->remote_address().to_string(),\n           ip_conn_count);\n\n    _client_session_count->set(ip_count);\n}\n\nvoid connection_oriented_network::on_server_session_disconnected(rpc_session_ptr &s)\n{\n    // how many unique client(the same ip:port is considered to be a unique client)\n    int ip_count = 0;\n    // one unique client may remain more than one connection on the server, which\n    // is an unexpected behavior of client, we should record it in logs.\n    int ip_conn_count = 0;\n\n    bool session_removed = false;\n    {\n        utils::auto_write_lock l(_servers_lock);\n        auto it = _servers.find(s->remote_address());\n        if (it != _servers.end() && it->second.get() == s.get()) {\n            _servers.erase(it);\n            session_removed = true;\n        }\n        ip_count = (int)_servers.size();\n\n        auto it2 = _ip_conn_count.find(s->remote_address().ip());\n        if (it2 != _ip_conn_count.end()) {\n            if (it2->second > 1) {\n                it2->second -= 1;\n                ip_conn_count = it2->second;\n            } else {\n                _ip_conn_count.erase(it2);\n            }\n        }\n    }\n\n    if (session_removed) {\n        ddebug(\"session %s disconnected, the total client sessions count remains %d\",\n               s->remote_address().to_string(),\n               ip_count);\n        _client_session_count->set(ip_count);\n    }\n\n    if (ip_conn_count == 0) {\n        // TODO(wutao1): print ip only\n        ddebug(\"client ip %s has no more session to this server\", s->remote_address().to_string());\n    } else {\n        ddebug(\"client ip %s has still %d of sessions to this server\",\n               s->remote_address().to_string(),\n               ip_conn_count);\n    }\n}\n\nbool connection_oriented_network::check_if_conn_threshold_exceeded(::dsn::rpc_address ep)\n{\n    if (_cfg_conn_threshold_per_ip <= 0) {\n        dinfo(\"new client from %s is connecting to server %s, no connection threshold\",\n              ep.ipv4_str(),\n              address().to_string());\n        return false;\n    }\n\n    bool exceeded = false;\n    int ip_conn_count = 0; // the amount of connections from this ip address.\n    {\n        utils::auto_read_lock l(_servers_lock);\n        auto it = _ip_conn_count.find(ep.ip());\n        if (it != _ip_conn_count.end()) {\n            ip_conn_count = it->second;\n        }\n    }\n    if (ip_conn_count >= _cfg_conn_threshold_per_ip) {\n        exceeded = true;\n    }\n\n    dinfo(\"new client from %s is connecting to server %s, existing connection count \"\n          \"= %d, threshold = %u\",\n          ep.ipv4_str(),\n          address().to_string(),\n          ip_conn_count,\n          _cfg_conn_threshold_per_ip);\n\n    return exceeded;\n}\n\nvoid connection_oriented_network::on_client_session_connected(rpc_session_ptr &s)\n{\n    int ip_count = 0;\n    bool r = false;\n    {\n        utils::auto_read_lock l(_clients_lock);\n        auto it = _clients.find(s->remote_address());\n        if (it != _clients.end() && it->second.get() == s.get()) {\n            r = true;\n        }\n        ip_count = (int)_clients.size();\n    }\n\n    if (r) {\n        ddebug(\"client session connected, remote_server = %s, current_count = %d\",\n               s->remote_address().to_string(),\n               ip_count);\n        _client_session_count->set(ip_count);\n    }\n}\n\nvoid connection_oriented_network::on_client_session_disconnected(rpc_session_ptr &s)\n{\n    int ip_count = 0;\n    bool r = false;\n    {\n        utils::auto_write_lock l(_clients_lock);\n        auto it = _clients.find(s->remote_address());\n        if (it != _clients.end() && it->second.get() == s.get()) {\n            _clients.erase(it);\n            r = true;\n        }\n        ip_count = (int)_clients.size();\n    }\n\n    if (r) {\n        ddebug(\"client session disconnected, remote_server = %s, current_count = %d\",\n               s->remote_address().to_string(),\n               ip_count);\n        _client_session_count->set(ip_count);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/network.sim.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <boost/asio.hpp>\n#include <dsn/service_api_c.h>\n#include <dsn/utility/singleton_store.h>\n#include <dsn/utility/rand.h>\n#include <dsn/tool/node_scoper.h>\n#include \"network.sim.h\"\n\nnamespace dsn {\nnamespace tools {\n\n// switch[channel][header_format]\n// multiple machines connect to the same switch\n// 10 should be >= than rpc_channel::max_value() + 1\n// 10 should be >= than network_header_format::max_value() + 1\nstatic utils::safe_singleton_store<::dsn::rpc_address, sim_network_provider *> s_switch[10][10];\n\nsim_client_session::sim_client_session(sim_network_provider &net,\n                                       ::dsn::rpc_address remote_addr,\n                                       message_parser_ptr &parser)\n    : rpc_session(net, remote_addr, parser, true)\n{\n}\n\nvoid sim_client_session::connect()\n{\n    if (set_connecting())\n        set_connected();\n}\n\nstatic message_ex *virtual_send_message(message_ex *msg)\n{\n    std::shared_ptr<char> buffer(\n        dsn::utils::make_shared_array<char>(msg->header->body_length + sizeof(message_header)));\n    char *tmp = buffer.get();\n\n    for (auto &buf : msg->buffers) {\n        memcpy((void *)tmp, (const void *)buf.data(), (size_t)buf.length());\n        tmp += buf.length();\n    }\n\n    blob bb(buffer, 0, msg->header->body_length + sizeof(message_header));\n    message_ex *recv_msg = message_ex::create_receive_message(bb);\n    recv_msg->to_address = msg->to_address;\n\n    msg->copy_to(*recv_msg); // extensible object state move\n\n    return recv_msg;\n}\n\nvoid sim_client_session::send(uint64_t sig)\n{\n    for (auto &msg : _sending_msgs) {\n        sim_network_provider *rnet = nullptr;\n        if (!s_switch[task_spec::get(msg->local_rpc_code)->rpc_call_channel][msg->hdr_format].get(\n                remote_address(), rnet)) {\n            derror(\"cannot find destination node %s in simulator\", remote_address().to_string());\n            // on_disconnected();  // disable this to avoid endless resending\n        } else {\n            auto server_session = rnet->get_server_session(_net.address());\n            if (nullptr == server_session) {\n                rpc_session_ptr cptr = this;\n                message_parser_ptr parser(_net.new_message_parser(msg->hdr_format));\n                server_session = new sim_server_session(*rnet, _net.address(), cptr, parser);\n                rnet->on_server_session_accepted(server_session);\n            }\n\n            message_ex *recv_msg = virtual_send_message(msg);\n\n            {\n                node_scoper ns(rnet->node());\n\n                bool ret = server_session->on_recv_message(recv_msg,\n                                                           recv_msg->to_address ==\n                                                                   recv_msg->header->from_address\n                                                               ? 0\n                                                               : rnet->net_delay_milliseconds());\n                dassert(ret, \"\");\n            }\n        }\n    }\n\n    on_send_completed(sig);\n}\n\nsim_server_session::sim_server_session(sim_network_provider &net,\n                                       ::dsn::rpc_address remote_addr,\n                                       rpc_session_ptr &client,\n                                       message_parser_ptr &parser)\n    : rpc_session(net, remote_addr, parser, false)\n{\n    _client = client;\n}\n\nvoid sim_server_session::send(uint64_t sig)\n{\n    for (auto &msg : _sending_msgs) {\n        message_ex *recv_msg = virtual_send_message(msg);\n\n        {\n            node_scoper ns(_client->net().node());\n\n            bool ret = _client->on_recv_message(\n                recv_msg,\n                recv_msg->to_address == recv_msg->header->from_address\n                    ? 0\n                    : (static_cast<sim_network_provider *>(&_net))->net_delay_milliseconds());\n            dassert(ret, \"\");\n        }\n    }\n\n    on_send_completed(sig);\n}\n\nsim_network_provider::sim_network_provider(rpc_engine *rpc, network *inner_provider)\n    : connection_oriented_network(rpc, inner_provider)\n{\n    _address.assign_ipv4(\"localhost\", 1);\n\n    _min_message_delay_microseconds = 1;\n    _max_message_delay_microseconds = 100000;\n\n    _min_message_delay_microseconds =\n        (uint32_t)dsn_config_get_value_uint64(\"tools.simulator\",\n                                              \"min_message_delay_microseconds\",\n                                              _min_message_delay_microseconds,\n                                              \"min message delay (us)\");\n    _max_message_delay_microseconds =\n        (uint32_t)dsn_config_get_value_uint64(\"tools.simulator\",\n                                              \"max_message_delay_microseconds\",\n                                              _max_message_delay_microseconds,\n                                              \"max message delay (us)\");\n}\n\nerror_code sim_network_provider::start(rpc_channel channel, int port, bool client_only)\n{\n    dassert(channel == RPC_CHANNEL_TCP || channel == RPC_CHANNEL_UDP,\n            \"invalid given channel %s\",\n            channel.to_string());\n\n    _address = ::dsn::rpc_address(\"localhost\", port);\n    auto hostname = boost::asio::ip::host_name();\n    if (!client_only) {\n        for (int i = NET_HDR_INVALID + 1; i <= network_header_format::max_value(); i++) {\n            if (s_switch[channel][i].put(_address, this)) {\n                auto ep2 = ::dsn::rpc_address(hostname.c_str(), port);\n                s_switch[channel][i].put(ep2, this);\n            } else {\n                return ERR_ADDRESS_ALREADY_USED;\n            }\n        }\n        return ERR_OK;\n    } else {\n        return ERR_OK;\n    }\n}\n\nuint32_t sim_network_provider::net_delay_milliseconds() const\n{\n    return static_cast<uint32_t>(\n               rand::next_u32(_min_message_delay_microseconds, _max_message_delay_microseconds)) /\n           1000;\n}\n}\n} // end namespace\n"
  },
  {
    "path": "src/runtime/rpc/network.sim.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n\nnamespace dsn {\nnamespace tools {\n\nclass sim_network_provider;\nclass sim_client_session : public rpc_session\n{\npublic:\n    sim_client_session(sim_network_provider &net,\n                       ::dsn::rpc_address remote_addr,\n                       message_parser_ptr &parser);\n\n    virtual void connect();\n\n    virtual void send(uint64_t signature) override;\n\n    virtual void do_read(int sz) override {}\n\n    virtual void close() override {}\n\n    virtual void on_failure(bool is_write = false) override {}\n};\n\nclass sim_server_session : public rpc_session\n{\npublic:\n    sim_server_session(sim_network_provider &net,\n                       ::dsn::rpc_address remote_addr,\n                       rpc_session_ptr &client,\n                       message_parser_ptr &parser);\n\n    virtual void send(uint64_t signature) override;\n\n    virtual void connect() {}\n\n    virtual void do_read(int sz) override {}\n\n    virtual void close() override {}\n\n    virtual void on_failure(bool is_write = false) override {}\n\nprivate:\n    rpc_session_ptr _client;\n};\n\nclass sim_network_provider : public connection_oriented_network\n{\npublic:\n    sim_network_provider(rpc_engine *rpc, network *inner_provider);\n    ~sim_network_provider(void) {}\n\n    virtual error_code start(rpc_channel channel, int port, bool client_only);\n\n    virtual ::dsn::rpc_address address() { return _address; }\n\n    virtual rpc_session_ptr create_client_session(::dsn::rpc_address server_addr)\n    {\n        message_parser_ptr parser(new_message_parser(_client_hdr_format));\n        return rpc_session_ptr(new sim_client_session(*this, server_addr, parser));\n    }\n\n    virtual rpc_session_ptr create_server_session(::dsn::rpc_address client_addr,\n                                                  rpc_session_ptr client_session)\n    {\n        message_parser_ptr parser(new_message_parser(_client_hdr_format));\n        return rpc_session_ptr(new sim_server_session(*this, client_addr, client_session, parser));\n    }\n\n    uint32_t net_delay_milliseconds() const;\n\nprivate:\n    ::dsn::rpc_address _address;\n    uint32_t _min_message_delay_microseconds;\n    uint32_t _max_message_delay_microseconds;\n};\n\n//------------- inline implementations -------------\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/raw_message_parser.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n#include \"raw_message_parser.h\"\n#include <dsn/service_api_c.h>\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/network.h>\n\nnamespace dsn {\n\nDEFINE_TASK_CODE_RPC(RPC_CALL_RAW_SESSION_DISCONNECT, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE_RPC(RPC_CALL_RAW_MESSAGE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\n// static\nvoid raw_message_parser::notify_rpc_session_disconnected(rpc_session *sp)\n{\n    if (!sp->is_client()) {\n        message_ex *special_msg = message_ex::create_receive_message_with_standalone_header(blob());\n        dsn::message_header *header = special_msg->header;\n        header->context.u.is_request = 1;\n        header->context.u.is_forwarded = 0;\n        header->from_address = sp->remote_address();\n        header->gpid.set_value(0);\n\n        strncpy(header->rpc_name, \"RPC_CALL_RAW_SESSION_DISCONNECT\", sizeof(header->rpc_name) - 1);\n        header->rpc_name[sizeof(header->rpc_name) - 1] = '\\0';\n        special_msg->local_rpc_code = RPC_CALL_RAW_SESSION_DISCONNECT;\n        special_msg->hdr_format = NET_HDR_RAW;\n        sp->on_recv_message(special_msg, 0);\n    }\n}\n\nraw_message_parser::raw_message_parser()\n{\n    bool hooked = false;\n    static std::atomic_bool s_handler_hooked(false);\n    if (s_handler_hooked.compare_exchange_strong(hooked, true)) {\n        ddebug(\"join point on_rpc_session_disconnected registered to notify disconnect with \"\n               \"RPC_CALL_RAW_SESSION_DISCONNECT\");\n        rpc_session::on_rpc_session_disconnected.put_back(\n            raw_message_parser::notify_rpc_session_disconnected,\n            \"notify disconnect with RPC_CALL_RAW_SESSION_DISCONNECT\");\n    }\n}\n\nmessage_ex *raw_message_parser::get_message_on_receive(message_reader *reader,\n                                                       /*out*/ int &read_next)\n{\n    if (reader->_buffer_occupied == 0) {\n        if (reader->_buffer.length() > 0)\n            read_next = reader->_buffer.length();\n        else\n            read_next = reader->_buffer_block_size;\n        return nullptr;\n    } else {\n        auto msg_length = reader->_buffer_occupied;\n        dsn::blob msg_blob = reader->_buffer.range(0, msg_length);\n        message_ex *new_message =\n            message_ex::create_receive_message_with_standalone_header(msg_blob);\n        message_header *header = new_message->header;\n\n        header->hdr_length = sizeof(*header);\n        header->body_length = msg_length;\n        strncpy(header->rpc_name, \"RPC_CALL_RAW_MESSAGE\", sizeof(header->rpc_name) - 1);\n        header->rpc_name[sizeof(header->rpc_name) - 1] = '\\0';\n        header->gpid.set_value(0);\n        header->context.u.is_request = 1;\n        header->context.u.is_forwarded = 0;\n        header->context.u.is_forward_supported = 0;\n\n        reader->_buffer = reader->_buffer.range(msg_length);\n        reader->_buffer_occupied = 0;\n        read_next = 0;\n\n        new_message->local_rpc_code = RPC_CALL_RAW_MESSAGE;\n        new_message->hdr_format = NET_HDR_RAW;\n        return new_message;\n    }\n}\n\nint raw_message_parser::get_buffers_on_send(message_ex *msg, send_buf *buffers)\n{\n    // we must skip the message header\n    unsigned int offset = sizeof(message_header);\n    int i = 0;\n    for (blob &buf : msg->buffers) {\n        if (offset >= buf.length()) {\n            offset -= buf.length();\n            continue;\n        }\n        buffers[i].buf = (void *)(buf.data() + offset);\n        buffers[i].sz = buf.length() - offset;\n        offset = 0;\n        ++i;\n    }\n    return i;\n}\n}\n"
  },
  {
    "path": "src/runtime/rpc/raw_message_parser.h",
    "content": "/*\n* The MIT License (MIT)\n*\n* Copyright (c) 2015 Microsoft Corporation\n*\n* -=- Robust Distributed System Nucleus (rDSN) -=-\n*\n* Permission is hereby granted, free of charge, to any person obtaining a copy\n* of this software and associated documentation files (the \"Software\"), to deal\n* in the Software without restriction, including without limitation the rights\n* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n* copies of the Software, and to permit persons to whom the Software is\n* furnished to do so, subject to the following conditions:\n*\n* The above copyright notice and this permission notice shall be included in\n* all copies or substantial portions of the Software.\n*\n* THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n* THE SOFTWARE.\n*/\n\n/*\n* Description:\n*     message parser for user customed request\n*\n* Revision history:\n*     xxxx-xx-xx, author, fix bug about xxx\n*/\n\n#ifndef RAW_MESSAGE_PARSER_H\n#define RAW_MESSAGE_PARSER_H\n\n#include <dsn/tool-api/message_parser.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/ports.h>\n\nnamespace dsn {\n\nDEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_RAW)\n\nclass rpc_session;\nclass raw_message_parser : public message_parser\n{\nprivate:\n    static void notify_rpc_session_disconnected(rpc_session *sp);\n\npublic:\n    raw_message_parser();\n    virtual ~raw_message_parser() {}\n    virtual message_ex *get_message_on_receive(message_reader *reader,\n                                               /*out*/ int &read_next) override;\n    virtual int get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers) override;\n};\n}\n#endif // RAW_MESSAGE_PARSER_H\n"
  },
  {
    "path": "src/runtime/rpc/request_meta.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\nnamespace cpp dsn\n\n// Metadata field of the request in rDSN's thrift protocol (version 1).\n// TODO(wutao1): add design doc of the thrift protocol.\nstruct thrift_request_meta_v1\n{\n    // The replica's gpid.\n    1:optional i32 app_id;\n    2:optional i32 partition_index;\n\n    // The timeout of this request that's set on client side.\n    3:optional i32 client_timeout;\n\n    // The hash value calculated from the hash key.\n    4:optional i64 client_partition_hash;\n\n    // Whether it is a backup request. If true, this request (only if it's a read) can be handled by\n    // a secondary replica, which does not guarantee strong consistency.\n    5:optional bool is_backup_request;\n}\n"
  },
  {
    "path": "src/runtime/rpc/rpc_engine.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <sys/socket.h>\n#include <netdb.h>\n#include <ifaddrs.h>\n#include <netinet/in.h>\n#include <arpa/inet.h>\n\n#include \"rpc_engine.h\"\n#include \"runtime/service_engine.h\"\n#include <dsn/utility/factory_store.h>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/tool-api/task_queue.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/cpp/serialization.h>\n#include <dsn/utility/rand.h>\n#include <set>\n\nnamespace dsn {\n\nDEFINE_TASK_CODE(LPC_RPC_TIMEOUT, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nclass rpc_timeout_task : public task\n{\npublic:\n    rpc_timeout_task(rpc_client_matcher *matcher, uint64_t id, service_node *node)\n        : task(LPC_RPC_TIMEOUT, 0, node)\n    {\n        _matcher = matcher;\n        _id = id;\n    }\n\n    virtual void exec() { _matcher->on_rpc_timeout(_id); }\n\nprivate:\n    // use the following if the matcher is per rpc session\n    // rpc_client_matcher_ptr _matcher;\n\n    rpc_client_matcher *_matcher;\n    uint64_t _id;\n};\n\nrpc_client_matcher::~rpc_client_matcher()\n{\n    for (int i = 0; i < MATCHER_BUCKET_NR; i++) {\n        dassert(_requests[i].size() == 0,\n                \"all rpc entries must be removed before the matcher ends\");\n    }\n}\n\nbool rpc_client_matcher::on_recv_reply(network *net, uint64_t key, message_ex *reply, int delay_ms)\n{\n    rpc_response_task_ptr call;\n    task_ptr timeout_task;\n    int bucket_index = key % MATCHER_BUCKET_NR;\n\n    {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_requests_lock[bucket_index]);\n        auto it = _requests[bucket_index].find(key);\n        if (it != _requests[bucket_index].end()) {\n            call = std::move(it->second.resp_task);\n            timeout_task = std::move(it->second.timeout_task);\n            _requests[bucket_index].erase(it);\n        } else {\n            if (reply) {\n                dassert(reply->get_count() == 0,\n                        \"reply should not be referenced by anybody so far\");\n                delete reply;\n            }\n            return false;\n        }\n    }\n\n    dbg_dassert(call != nullptr, \"rpc response task cannot be empty\");\n    dbg_dassert(timeout_task != nullptr, \"rpc timeout task cannot be empty\");\n\n    if (timeout_task != task::get_current_task()) {\n        timeout_task->cancel(false); // no need to wait\n    }\n\n    auto req = call->get_request();\n    auto spec = task_spec::get(req->local_rpc_code);\n\n    // if rpc is early terminated with empty reply\n    if (nullptr == reply) {\n        if (req->server_address.type() == HOST_TYPE_GROUP && spec->grpc_mode == GRPC_TO_LEADER &&\n            req->server_address.group_address()->is_update_leader_automatically()) {\n            req->server_address.group_address()->leader_forward();\n        }\n\n        call->set_delay(delay_ms);\n        call->enqueue(ERR_NETWORK_FAILURE, reply);\n        return true;\n    }\n\n    // normal reply\n    auto err = reply->error();\n\n    // if this is pure client (no server port assigned), we can only do fake forwarding,\n    // in this case, the server will return ERR_FORWARD_TO_OTHERS\n    if (err == ERR_FORWARD_TO_OTHERS) {\n        rpc_address addr;\n        ::dsn::unmarshall((dsn::message_ex *)reply, addr);\n\n        // handle the case of forwarding to itself where addr == req->to_address.\n        dbg_dassert(addr != req->to_address,\n                    \"impossible forwarding to myself as this only happens when i'm pure client so \"\n                    \"i don't get a named to_address %s\",\n                    addr.to_string());\n\n        // server address side effect\n        switch (req->server_address.type()) {\n        case HOST_TYPE_GROUP:\n            switch (spec->grpc_mode) {\n            case GRPC_TO_LEADER:\n                if (req->server_address.group_address()->is_update_leader_automatically()) {\n                    req->server_address.group_address()->set_leader(addr);\n                }\n                break;\n            default:\n                break;\n            }\n            break;\n        default:\n            dassert(false, \"not implemented\");\n            break;\n        }\n\n        // do fake forwarding, reset request_id\n        // TODO(qinzuoyan): reset timeout to new value\n        _engine->call_ip(addr, req, call, true);\n\n        dassert(reply->get_count() == 0, \"reply should not be referenced by anybody so far\");\n        delete reply;\n    } else {\n        // server address side effect\n        if (reply->header->context.u.is_forwarded) {\n            switch (req->server_address.type()) {\n            case HOST_TYPE_GROUP:\n                switch (spec->grpc_mode) {\n                case GRPC_TO_LEADER:\n                    if (err == ERR_OK &&\n                        req->server_address.group_address()->is_update_leader_automatically()) {\n                        req->server_address.group_address()->set_leader(\n                            reply->header->from_address);\n                    }\n                    break;\n                default:\n                    break;\n                }\n                break;\n            default:\n                dassert(false, \"not implemented\");\n                break;\n            }\n        }\n\n        call->set_delay(delay_ms);\n\n        // failure injection applied\n        if (!call->enqueue(err, reply)) {\n            ddebug(\"rpc reply %s is dropped (fault inject), trace_id = %016\" PRIx64,\n                   reply->header->rpc_name,\n                   reply->header->trace_id);\n\n            // call network failure model\n            net->inject_drop_message(reply, false);\n        }\n    }\n\n    return true;\n}\n\nvoid rpc_client_matcher::on_rpc_timeout(uint64_t key)\n{\n    rpc_response_task_ptr call;\n    int bucket_index = key % MATCHER_BUCKET_NR;\n    uint64_t timeout_ts_ms;\n    bool resend = false;\n\n    {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_requests_lock[bucket_index]);\n        auto it = _requests[bucket_index].find(key);\n        if (it != _requests[bucket_index].end()) {\n            timeout_ts_ms = it->second.timeout_ts_ms;\n            call = it->second.resp_task;\n            if (timeout_ts_ms == 0) {\n                _requests[bucket_index].erase(it);\n            }\n\n            // resend is enabled\n            else {\n                // do it in next check so we can do expensive things\n                // outside of the lock\n                resend = true;\n            }\n        } else {\n            return;\n        }\n    }\n\n    dbg_dassert(call != nullptr, \"rpc response task is missing for rpc request %\" PRIu64, key);\n\n    // if timeout\n    if (!resend) {\n        call->enqueue(ERR_TIMEOUT, nullptr);\n        return;\n    }\n\n    // prepare resend context and check again\n    uint64_t now_ts_ms = dsn_now_ms();\n\n    // resend when timeout is not yet, and the call is not cancelled\n    // TODO: time overflow\n    resend = (now_ts_ms < timeout_ts_ms && call->state() == TASK_STATE_READY);\n\n    // TODO: memory pool for this task\n    task_ptr new_timeout_task;\n    if (resend) {\n        new_timeout_task = new rpc_timeout_task(this, key, call->node());\n    }\n\n    {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_requests_lock[bucket_index]);\n        auto it = _requests[bucket_index].find(key);\n        if (it != _requests[bucket_index].end()) {\n            // timeout\n            if (!resend) {\n                _requests[bucket_index].erase(it);\n            }\n\n            // resend\n            else {\n                // reset timeout task\n                it->second.timeout_task = new_timeout_task;\n            }\n        }\n\n        // response is received\n        else {\n            resend = false;\n        }\n    }\n\n    if (resend) {\n        auto req = call->get_request();\n        dinfo(\"resend request message for rpc trace_id = %016\" PRIx64 \", key = %\" PRIu64,\n              req->header->trace_id,\n              key);\n\n        // resend without handling rpc_matcher, use the same request_id\n        _engine->call_ip(req->to_address, req, nullptr);\n\n        // use rest of the timeout to resend once only\n        new_timeout_task->set_delay(static_cast<int>(timeout_ts_ms - now_ts_ms));\n        new_timeout_task->enqueue();\n    }\n}\n\nvoid rpc_client_matcher::on_call(message_ex *request, const rpc_response_task_ptr &call)\n{\n    message_header &hdr = *request->header;\n    int bucket_index = hdr.id % MATCHER_BUCKET_NR;\n    auto sp = task_spec::get(request->local_rpc_code);\n    int timeout_ms = hdr.client.timeout_ms;\n    uint64_t timeout_ts_ms = 0;\n\n    // reset timeout when resend is enabled\n    if (sp->rpc_request_resend_timeout_milliseconds > 0 &&\n        timeout_ms > sp->rpc_request_resend_timeout_milliseconds) {\n        timeout_ts_ms = dsn_now_ms() + timeout_ms; // non-zero for resend\n        timeout_ms = sp->rpc_request_resend_timeout_milliseconds;\n    }\n\n    dbg_dassert(call != nullptr, \"rpc response task cannot be empty\");\n    task *timeout_task(new rpc_timeout_task(this, hdr.id, call->node()));\n\n    {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_requests_lock[bucket_index]);\n        auto pr =\n            _requests[bucket_index].emplace(hdr.id, match_entry{call, timeout_task, timeout_ts_ms});\n        dassert(pr.second, \"the message is already on the fly!!!\");\n    }\n\n    timeout_task->set_delay(timeout_ms);\n    timeout_task->enqueue();\n}\n\n//----------------------------------------------------------------------------------------------\nrpc_server_dispatcher::rpc_server_dispatcher()\n{\n    _vhandlers.resize(dsn::task_code::max() + 1);\n    for (auto &h : _vhandlers) {\n        h = new std::pair<std::unique_ptr<handler_entry>, utils::rw_lock_nr>();\n    }\n    _handlers.clear();\n}\n\nrpc_server_dispatcher::~rpc_server_dispatcher()\n{\n    for (auto &h : _vhandlers) {\n        delete h;\n    }\n    _vhandlers.clear();\n    _handlers.clear();\n    dassert(_handlers.size() == 0,\n            \"please make sure all rpc handlers are unregistered at this point\");\n}\n\nbool rpc_server_dispatcher::register_rpc_handler(dsn::task_code code,\n                                                 const char *extra_name,\n                                                 const rpc_request_handler &h)\n{\n    std::unique_ptr<handler_entry> ctx(new handler_entry{code, extra_name, h});\n\n    utils::auto_write_lock l(_handlers_lock);\n    auto it = _handlers.find(code.to_string());\n    auto it2 = _handlers.find(extra_name);\n    if (it == _handlers.end() && it2 == _handlers.end()) {\n        _handlers[code.to_string()] = ctx.get();\n        _handlers[ctx->extra_name] = ctx.get();\n\n        {\n            utils::auto_write_lock l(_vhandlers[code.code()]->second);\n            _vhandlers[code.code()]->first = std::move(ctx);\n        }\n        return true;\n    } else {\n        dassert(false, \"rpc registration confliction for '%s' '%s'\", code.to_string(), extra_name);\n        return false;\n    }\n}\n\nbool rpc_server_dispatcher::unregister_rpc_handler(dsn::task_code rpc_code)\n{\n    {\n        utils::auto_write_lock l(_handlers_lock);\n        auto it = _handlers.find(rpc_code.to_string());\n        if (it == _handlers.end())\n            return false;\n\n        handler_entry *ctx = it->second;\n        _handlers.erase(it);\n        _handlers.erase(ctx->extra_name);\n\n        {\n            utils::auto_write_lock l(_vhandlers[rpc_code]->second);\n            _vhandlers[rpc_code]->first.reset();\n        }\n    }\n\n    return true;\n}\n\nrpc_request_task *rpc_server_dispatcher::on_request(message_ex *msg, service_node *node)\n{\n    rpc_request_handler handler;\n\n    if (TASK_CODE_INVALID != msg->local_rpc_code) {\n        utils::auto_read_lock l(_vhandlers[msg->local_rpc_code]->second);\n        handler_entry *ctx = _vhandlers[msg->local_rpc_code]->first.get();\n        if (ctx != nullptr) {\n            handler = ctx->h;\n        }\n    } else {\n        utils::auto_read_lock l(_handlers_lock);\n        auto it = _handlers.find(msg->header->rpc_name);\n        if (it != _handlers.end()) {\n            msg->local_rpc_code = it->second->code;\n            handler = it->second->h;\n        }\n    }\n\n    if (handler) {\n        auto r = new rpc_request_task(msg, std::move(handler), node);\n        r->spec().on_task_create.execute(task::get_current_task(), r);\n        return r;\n    } else\n        return nullptr;\n}\n\n//----------------------------------------------------------------------------------------------\nrpc_engine::rpc_engine(service_node *node) : _node(node), _rpc_matcher(this)\n{\n    dassert(_node != nullptr, \"\");\n    _is_running = false;\n    _is_serving = false;\n}\n\n//\n// management routines\n//\nnetwork *rpc_engine::create_network(const network_server_config &netcs,\n                                    bool client_only,\n                                    network_header_format client_hdr_format)\n{\n    network *net = utils::factory_store<network>::create(\n        netcs.factory_name.c_str(), ::dsn::PROVIDER_TYPE_MAIN, this, nullptr);\n    net->reset_parser_attr(client_hdr_format, netcs.message_buffer_block_size);\n\n    // start the net\n    error_code ret = net->start(netcs.channel, netcs.port, client_only);\n    if (ret == ERR_OK) {\n        return net;\n    } else {\n        // mem leak, don't care as it halts the program\n        dassert(false, \"create network failed, error_code: %s\", ret.to_string());\n        return nullptr;\n    }\n}\n\nerror_code rpc_engine::start(const service_app_spec &aspec)\n{\n    if (_is_running) {\n        return ERR_SERVICE_ALREADY_RUNNING;\n    }\n\n    // start client networks\n    _client_nets.resize(network_header_format::max_value() + 1);\n\n    // for each format\n    for (int i = NET_HDR_INVALID + 1; i <= network_header_format::max_value(); i++) {\n        std::vector<std::unique_ptr<network>> &pnet = _client_nets[i];\n        pnet.resize(rpc_channel::max_value() + 1);\n        auto client_hdr_format = network_header_format(network_header_format::to_string(i));\n\n        // for each channel\n        for (int j = 0; j <= rpc_channel::max_value(); j++) {\n            rpc_channel c = rpc_channel(rpc_channel::to_string(j));\n            std::string factory;\n            int blk_size;\n\n            auto it1 = aspec.network_client_confs.find(c);\n            if (it1 != aspec.network_client_confs.end()) {\n                factory = it1->second.factory_name;\n                blk_size = it1->second.message_buffer_block_size;\n            } else {\n                dwarn(\"network client for channel %s not registered, assuming not used further\",\n                      c.to_string());\n                continue;\n            }\n\n            network_server_config cs(aspec.id, c);\n            cs.factory_name = factory;\n            cs.message_buffer_block_size = blk_size;\n\n            auto net = create_network(cs, true, client_hdr_format);\n            if (!net)\n                return ERR_NETWORK_INIT_FAILED;\n            pnet[j].reset(net);\n\n            ddebug(\"[%s] network client started at port %u, channel = %s, fmt = %s ...\",\n                   node()->full_name(),\n                   (uint32_t)(cs.port),\n                   cs.channel.to_string(),\n                   client_hdr_format.to_string());\n        }\n    }\n\n    // start server networks\n    for (auto &sp : aspec.network_server_confs) {\n        int port = sp.second.port;\n\n        std::vector<std::unique_ptr<network>> *pnets;\n        auto it = _server_nets.find(port);\n\n        if (it == _server_nets.end()) {\n            auto pr = _server_nets.emplace(port, std::vector<std::unique_ptr<network>>{});\n            pnets = &pr.first->second;\n            pnets->resize(rpc_channel::max_value() + 1);\n        } else {\n            pnets = &it->second;\n        }\n\n        auto net = create_network(sp.second, false, NET_HDR_DSN);\n        if (net == nullptr) {\n            return ERR_NETWORK_INIT_FAILED;\n        }\n\n        (*pnets)[sp.second.channel].reset(net);\n\n        dwarn(\"[%s] network server started at port %u, channel = %s, ...\",\n              node()->full_name(),\n              (uint32_t)(port),\n              sp.second.channel.to_string());\n    }\n\n    _local_primary_address = _client_nets[NET_HDR_DSN][0]->address();\n    _local_primary_address.set_port(aspec.ports.size() > 0 ? *aspec.ports.begin() : aspec.id);\n\n    ddebug(\"=== service_node=[%s], primary_address=[%s] ===\",\n           _node->full_name(),\n           _local_primary_address.to_string());\n\n    _is_running = true;\n    return ERR_OK;\n}\n\nbool rpc_engine::register_rpc_handler(dsn::task_code code,\n                                      const char *extra_name,\n                                      const rpc_request_handler &h)\n{\n    return _rpc_dispatcher.register_rpc_handler(code, extra_name, h);\n}\n\nbool rpc_engine::unregister_rpc_handler(dsn::task_code rpc_code)\n{\n    return _rpc_dispatcher.unregister_rpc_handler(rpc_code);\n}\n\nvoid rpc_engine::on_recv_request(network *net, message_ex *msg, int delay_ms)\n{\n    if (!_is_serving) {\n        dwarn(\"recv message with rpc name %s from %s when rpc engine is not serving, trace_id = \"\n              \"%\" PRIu64,\n              msg->header->rpc_name,\n              msg->header->from_address.to_string(),\n              msg->header->trace_id);\n\n        dassert(msg->get_count() == 0, \"request should not be referenced by anybody so far\");\n        delete msg;\n        return;\n    }\n\n    auto code = msg->rpc_code();\n\n    if (code != ::dsn::TASK_CODE_INVALID) {\n        rpc_request_task *tsk = nullptr;\n\n        // handle replication\n        if (msg->header->gpid.get_app_id() > 0) {\n            tsk = _node->generate_intercepted_request_task(msg);\n        }\n\n        if (tsk == nullptr) {\n            tsk = _rpc_dispatcher.on_request(msg, _node);\n        }\n\n        if (tsk != nullptr) {\n            // injector\n            if (tsk->spec().on_rpc_request_enqueue.execute(tsk, true)) {\n                // we set a default delay if it isn't generated by fault-injector\n                if (tsk->delay_milliseconds() == 0)\n                    tsk->set_delay(delay_ms);\n                tsk->enqueue();\n            }\n\n            // release the task when necessary\n            else {\n                ddebug(\"rpc request %s is dropped (fault inject), trace_id = %016\" PRIx64,\n                       msg->header->rpc_name,\n                       msg->header->trace_id);\n\n                // call network failure model when network is present\n                net->inject_drop_message(msg, false);\n\n                // because (1) initially, the ref count is zero\n                //         (2) upper apps may call add_ref already\n                tsk->add_ref();\n                tsk->release_ref();\n            }\n        } else {\n            dwarn(\"recv message with unhandled rpc name %s from %s, trace_id = %016\" PRIx64,\n                  msg->header->rpc_name,\n                  msg->header->from_address.to_string(),\n                  msg->header->trace_id);\n\n            dassert(msg->get_count() == 0, \"request should not be referenced by anybody so far\");\n            msg->add_ref();\n            dsn_rpc_reply(msg->create_response(), ::dsn::ERR_HANDLER_NOT_FOUND);\n            msg->release_ref();\n        }\n    } else {\n        dwarn(\"recv message with unknown rpc name %s from %s, trace_id = %016\" PRIx64,\n              msg->header->rpc_name,\n              msg->header->from_address.to_string(),\n              msg->header->trace_id);\n\n        dassert(msg->get_count() == 0, \"request should not be referenced by anybody so far\");\n        msg->add_ref();\n        dsn_rpc_reply(msg->create_response(), ::dsn::ERR_HANDLER_NOT_FOUND);\n        msg->release_ref();\n    }\n}\n\nvoid rpc_engine::call(message_ex *request, const rpc_response_task_ptr &call)\n{\n    auto &hdr = *request->header;\n    hdr.from_address = primary_address();\n    hdr.trace_id = rand::next_u64(std::numeric_limits<decltype(hdr.trace_id)>::min(),\n                                  std::numeric_limits<decltype(hdr.trace_id)>::max());\n\n    call_address(request->server_address, request, call);\n}\n\nvoid rpc_engine::call_group(rpc_address addr,\n                            message_ex *request,\n                            const rpc_response_task_ptr &call)\n{\n    dbg_dassert(addr.type() == HOST_TYPE_GROUP, \"only group is now supported\");\n\n    auto sp = task_spec::get(request->local_rpc_code);\n    switch (sp->grpc_mode) {\n    case GRPC_TO_LEADER:\n        call_ip(request->server_address.group_address()->possible_leader(), request, call);\n        break;\n    case GRPC_TO_ANY:\n        // TODO: performance optimization\n        call_ip(request->server_address.group_address()->random_member(), request, call);\n        break;\n    case GRPC_TO_ALL:\n        dassert(false, \"to be implemented\");\n        break;\n    default:\n        dassert(false, \"invalid group rpc mode %d\", (int)(sp->grpc_mode));\n    }\n}\n\nvoid rpc_engine::call_ip(rpc_address addr,\n                         message_ex *request,\n                         const rpc_response_task_ptr &call,\n                         bool reset_request_id,\n                         bool set_forwarded)\n{\n    dbg_dassert(addr.type() == HOST_TYPE_IPV4, \"only IPV4 is now supported\");\n    dbg_dassert(addr.port() > MAX_CLIENT_PORT, \"only server address can be called\");\n    dassert(!request->header->from_address.is_invalid(),\n            \"from address must be set before call call_ip\");\n\n    while (!request->dl.is_alone()) {\n        dwarn(\"msg request %s (trace_id = %016\" PRIx64 \") is in sending queue, try to pick out ...\",\n              request->header->rpc_name,\n              request->header->trace_id);\n        auto s = request->io_session;\n        if (s.get() != nullptr) {\n            s->cancel(request);\n        }\n    }\n\n    request->to_address = addr;\n\n    auto sp = task_spec::get(request->local_rpc_code);\n    auto &hdr = *request->header;\n\n    network *net = _client_nets[request->hdr_format][sp->rpc_call_channel].get();\n    dassert(nullptr != net,\n            \"network not present for rpc channel '%s' with format '%s' used by rpc %s\",\n            sp->rpc_call_channel.to_string(),\n            sp->rpc_call_header_format.to_string(),\n            hdr.rpc_name);\n\n    dinfo(\"rpc_name = %s, remote_addr = %s, header_format = %s, channel = %s, seq_id = %\" PRIu64\n          \", trace_id = %016\" PRIx64,\n          hdr.rpc_name,\n          addr.to_string(),\n          request->hdr_format.to_string(),\n          sp->rpc_call_channel.to_string(),\n          hdr.id,\n          hdr.trace_id);\n\n    if (reset_request_id) {\n        hdr.id = message_ex::new_id();\n    }\n\n    if (set_forwarded && request->header->context.u.is_forwarded == false) {\n        request->header->context.u.is_forwarded = true;\n    }\n\n    // join point and possible fault injection\n    if (!sp->on_rpc_call.execute(task::get_current_task(), request, call, true)) {\n        ddebug(\"rpc request %s is dropped (fault inject), trace_id = %016\" PRIx64,\n               request->header->rpc_name,\n               request->header->trace_id);\n\n        // call network failure model\n        net->inject_drop_message(request, true);\n\n        if (call != nullptr) {\n            call->set_delay(hdr.client.timeout_ms);\n            call->enqueue(ERR_TIMEOUT, nullptr);\n        } else {\n            // as ref_count for request may be zero\n            request->add_ref();\n            request->release_ref();\n        }\n\n        return;\n    }\n\n    if (call != nullptr) {\n        _rpc_matcher.on_call(request, call);\n    }\n\n    net->send_message(request);\n}\n\nvoid rpc_engine::reply(message_ex *response, error_code err)\n{\n    // when a message doesn't need to reply, we don't do the on_rpc_reply hooks to avoid mistakes\n    // for example, the profiler may be mistakenly calculated\n    auto s = response->io_session.get();\n    if (s == nullptr && response->to_address.is_invalid()) {\n        dinfo(\"rpc reply %s is dropped (invalid to-address), trace_id = %016\" PRIx64,\n              response->header->rpc_name,\n              response->header->trace_id);\n        response->add_ref();\n        response->release_ref();\n        return;\n    }\n\n    strncpy(response->header->server.error_name,\n            err.to_string(),\n            sizeof(response->header->server.error_name) - 1);\n    response->header->server.error_name[sizeof(response->header->server.error_name) - 1] = '\\0';\n    response->header->server.error_code.local_code = err;\n    response->header->server.error_code.local_hash = message_ex::s_local_hash;\n\n    // response rpc code may be TASK_CODE_INVALID when request rpc code is not exist\n    auto sp = response->local_rpc_code == TASK_CODE_INVALID\n                  ? nullptr\n                  : task_spec::get(response->local_rpc_code);\n\n    bool no_fail = true;\n    if (sp) {\n        // current task may be nullptr when this method is directly invoked from rpc_engine.\n        task *cur_task = task::get_current_task();\n        if (cur_task) {\n            no_fail = sp->on_rpc_reply.execute(cur_task, response, true);\n        }\n    }\n\n    // connection oriented network, we have bound session\n    if (s != nullptr) {\n        // not forwarded, we can use the original rpc session\n        if (!response->header->context.u.is_forwarded) {\n            if (no_fail) {\n                s->send_message(response);\n            } else {\n                s->net().inject_drop_message(response, true);\n            }\n        }\n\n        // request is forwarded, we cannot use the original rpc session,\n        // so use client session to send response.\n        else {\n            dbg_dassert(response->to_address.port() > MAX_CLIENT_PORT,\n                        \"target address must have named port in this case\");\n\n            // use the header format recorded in the message\n            auto rpc_channel = sp ? sp->rpc_call_channel : RPC_CHANNEL_TCP;\n            network *net = _client_nets[response->hdr_format][rpc_channel].get();\n            dassert(\n                nullptr != net,\n                \"client network not present for rpc channel '%s' with format '%s' used by rpc %s\",\n                RPC_CHANNEL_TCP.to_string(),\n                response->hdr_format.to_string(),\n                response->header->rpc_name);\n\n            if (no_fail) {\n                net->send_message(response);\n            } else {\n                net->inject_drop_message(response, true);\n            }\n        }\n    }\n\n    // not connection oriented network, we always use the named network to send msgs\n    else {\n        dbg_dassert(response->to_address.port() > MAX_CLIENT_PORT,\n                    \"target address must have named port in this case\");\n\n        auto rpc_channel = sp ? sp->rpc_call_channel : RPC_CHANNEL_TCP;\n        network *net = _server_nets[response->header->from_address.port()][rpc_channel].get();\n\n        dassert(nullptr != net,\n                \"server network not present for rpc channel '%s' on port %u used by rpc %s\",\n                RPC_CHANNEL_UDP.to_string(),\n                response->header->from_address.port(),\n                response->header->rpc_name);\n\n        if (no_fail) {\n            net->send_message(response);\n        } else {\n            net->inject_drop_message(response, true);\n        }\n    }\n\n    if (!no_fail) {\n        // because (1) initially, the ref count is zero\n        //         (2) upper apps may call add_ref already\n        response->add_ref();\n        response->release_ref();\n    }\n}\n\nvoid rpc_engine::forward(message_ex *request, rpc_address address)\n{\n    dassert(request->header->context.u.is_request, \"only rpc request can be forwarded\");\n    dassert(request->header->context.u.is_forward_supported,\n            \"rpc msg %s (trace_id = %016\" PRIx64 \") does not support being forwared\",\n            task_spec::get(request->local_rpc_code)->name.c_str(),\n            request->header->trace_id);\n    dassert(address != primary_address(),\n            \"cannot forward msg %s (trace_id = %016\" PRIx64 \") to the local node\",\n            task_spec::get(request->local_rpc_code)->name.c_str(),\n            request->header->trace_id);\n\n    // msg is from pure client (no server port assigned)\n    // in this case, we have no way to directly post a message\n    // to it but reusing the current server connection\n    // we therefore cannot really do the forwarding but fake it\n    if (request->header->from_address.port() <= MAX_CLIENT_PORT) {\n        auto resp = request->create_response();\n        ::dsn::marshall(resp, address);\n        ::dsn::task::get_current_rpc()->reply(resp, ::dsn::ERR_FORWARD_TO_OTHERS);\n    }\n\n    // do real forwarding, not reset request_id, but set forwarded flag\n    // if forwarding failed for non-timeout reason (such as connection denied),\n    // we will consider this as msg lost from the client side's perspective as\n    else {\n        auto copied_request = request->copy_and_prepare_send(false);\n        call_ip(address, copied_request, nullptr, false, true);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/rpc_engine.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/network.h>\n#include <dsn/tool-api/global_config.h>\n\nnamespace dsn {\n\nclass service_node;\nclass rpc_engine;\n\n#define MAX_CLIENT_PORT 1023\n\n//\n// client matcher for matching RPC request and RPC response, and handling timeout\n// (1) the whole network may share a single client matcher,\n// (2) or we usually prefere each <src, dst> pair use a client matcher to have better inquery\n// performance\n// (3) or we have certain cases we want RPC responses from node which is not the initial target node\n//     the RPC request message is sent to. In this case, a shared rpc_engine level matcher is used.\n//\n// WE NOW USE option (3) so as to enable more features and the performance should not be degraded\n// (due to\n// less std::shared_ptr<rpc_client_matcher> operations in rpc_timeout_task\n//\n#define MATCHER_BUCKET_NR 13\nclass rpc_client_matcher : public ref_counter\n{\npublic:\n    rpc_client_matcher(rpc_engine *engine) : _engine(engine) {}\n\n    ~rpc_client_matcher();\n\n    //\n    // when a two-way RPC call is made, register the requst id and the callback\n    // which also registers a timer for timeout tracking\n    //\n    void on_call(message_ex *request, const rpc_response_task_ptr &call);\n\n    //\n    // when a RPC response is received, call this function to trigger calback\n    //  key - message.header.id\n    //  reply - rpc response message\n    //  delay_ms - sometimes we want to delay the delivery of the message for certain purposes\n    //\n    // we may receive an empty reply to early terminate the rpc\n    //\n    bool on_recv_reply(network *net, uint64_t key, message_ex *reply, int delay_ms);\n\nprivate:\n    friend class rpc_timeout_task;\n    void on_rpc_timeout(uint64_t key);\n\nprivate:\n    rpc_engine *_engine;\n    struct match_entry\n    {\n        rpc_response_task_ptr resp_task;\n        task_ptr timeout_task;\n        uint64_t timeout_ts_ms; // > 0 for auto-resent msgs\n    };\n    typedef std::unordered_map<uint64_t, match_entry> rpc_requests;\n    rpc_requests _requests[MATCHER_BUCKET_NR];\n    ::dsn::utils::ex_lock_nr_spin _requests_lock[MATCHER_BUCKET_NR];\n};\n\nclass rpc_server_dispatcher\n{\npublic:\n    rpc_server_dispatcher();\n    ~rpc_server_dispatcher();\n\n    bool register_rpc_handler(task_code code, const char *extra_name, const rpc_request_handler &h);\n    bool unregister_rpc_handler(task_code rpc_code);\n    rpc_request_task *on_request(message_ex *msg, service_node *node);\n    int handler_count() const\n    {\n        utils::auto_read_lock l(_handlers_lock);\n        return static_cast<int>(_handlers.size());\n    }\n\nprivate:\n    struct handler_entry\n    {\n        task_code code;\n        std::string extra_name;\n        rpc_request_handler h;\n    };\n\n    mutable utils::rw_lock_nr _handlers_lock;\n    // there are 2 pairs for each rpc handler: code_name->hander_entry*, extra_name->hander_entry*\n    // the hander_entry pointers are the same for these 2 pairs, and the pointer is owned by\n    // _vhandlers[code_index]->first\n    //\n    // we support an extra name for compatibility to\n    // rpc client of other framework like thrift or grpc\n    std::unordered_map<std::string, handler_entry *> _handlers;\n\n    // there is one entry for each rpc code\n    std::vector<std::pair<std::unique_ptr<handler_entry>, utils::rw_lock_nr> *> _vhandlers;\n};\n\nclass rpc_engine\n{\npublic:\n    explicit rpc_engine(service_node *node);\n\n    //\n    // management routines\n    //\n    ::dsn::error_code start(const service_app_spec &spec);\n    void start_serving() { _is_serving = true; }\n    void stop_serving() { _is_serving = false; }\n\n    //\n    // rpc registrations\n    //\n    bool\n    register_rpc_handler(dsn::task_code code, const char *extra_name, const rpc_request_handler &h);\n    bool unregister_rpc_handler(dsn::task_code rpc_code);\n\n    //\n    // rpc routines\n    //\n    void call(message_ex *request, const rpc_response_task_ptr &call);\n    void on_recv_request(network *net, message_ex *msg, int delay_ms);\n    void reply(message_ex *response, error_code err = ERR_OK);\n    void forward(message_ex *request, rpc_address address);\n\n    //\n    // information inquery\n    //\n    service_node *node() const { return _node; }\n    ::dsn::rpc_address primary_address() const { return _local_primary_address; }\n    rpc_client_matcher *matcher() { return &_rpc_matcher; }\n\n    // call with group address only\n    void call_group(rpc_address addr, message_ex *request, const rpc_response_task_ptr &call);\n\n    // call with ip address only\n    void call_ip(rpc_address addr,\n                 message_ex *request,\n                 const rpc_response_task_ptr &call,\n                 bool reset_request_id = false,\n                 bool set_forwarded = false);\n\n    // call with explicit address\n    void call_address(rpc_address addr, message_ex *request, const rpc_response_task_ptr &call);\n\nprivate:\n    network *create_network(const network_server_config &netcs,\n                            bool client_only,\n                            network_header_format client_hdr_format);\n\nprivate:\n    service_node *_node;\n    std::vector<std::vector<std::unique_ptr<network>>>\n        _client_nets; // <format, <CHANNEL, network*>>\n    std::unordered_map<int, std::vector<std::unique_ptr<network>>>\n        _server_nets; // <port, <CHANNEL, network*>>\n    ::dsn::rpc_address _local_primary_address;\n    rpc_client_matcher _rpc_matcher;\n    rpc_server_dispatcher _rpc_dispatcher;\n\n    volatile bool _is_running;\n    volatile bool _is_serving;\n};\n\n// ------------------------ inline implementations --------------------\n\ninline void\nrpc_engine::call_address(rpc_address addr, message_ex *request, const rpc_response_task_ptr &call)\n{\n    switch (addr.type()) {\n    case HOST_TYPE_IPV4:\n        call_ip(addr, request, call);\n        break;\n    case HOST_TYPE_GROUP:\n        call_group(addr, request, call);\n        break;\n    default:\n        dassert(false, \"invalid target address type %d\", (int)request->server_address.type());\n        break;\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/rpc_message.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/crc.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/tool-api/network.h>\n#include <dsn/tool-api/message_parser.h>\n#include <cctype>\n\n#include \"runtime/task/task_engine.h\"\n\nusing namespace dsn::utils;\n\nnamespace dsn {\n\nstd::atomic<uint64_t> message_ex::_id(0);\nuint32_t message_ex::s_local_hash = 0;\n\nmessage_ex::message_ex()\n    : header(nullptr),\n      local_rpc_code(::dsn::TASK_CODE_INVALID),\n      hdr_format(NET_HDR_INVALID),\n      send_retry_count(0),\n      _rw_index(-1),\n      _rw_offset(0),\n      _rw_committed(true),\n      _is_read(false)\n{\n}\n\nmessage_ex::~message_ex()\n{\n    // coz message_header's memory is managed by vector \"buffers\", so its memory will be released\n    // after blobs in \"buffers\" are free.\n    //\n    // however, the message_header's object is constructed with placement new\n    // in prepare_buffer_header, so the destructor won't be called automatically with the\n    // \"free of blobs in buffers\".\n    //\n    // strictly speaking, we should call release_header_buffer to trigger message_header's\n    // destructor, but we can't do this as the message_header may be shared with other\n    // rpc_message objects if you call \"copy_and_prepare_send\".\n    //\n    // so here we simply skip the release_header_buffer. Notice this won't lead to any\n    // memory leak problem as the header's destructor is trival:\n    //      gpid -> we can treat it as POD type\n    //      rpc_address -> only ipv4, we can treat it as POD type\n    //\n    // Please refer to comments on message_header's definition for details\n\n    // release_header_buffer();\n    if (!_is_read) {\n        dassert(_rw_committed, \"message write is not committed\");\n    }\n}\n\nerror_code message_ex::error()\n{\n    dsn::error_code code;\n    auto binary_hash = header->server.error_code.local_hash;\n    if (binary_hash != 0 && binary_hash == ::dsn::message_ex::s_local_hash) {\n        code = dsn::error_code(header->server.error_code.local_code);\n    } else {\n        code = error_code::try_get(header->server.error_name, dsn::ERR_UNKNOWN);\n        header->server.error_code.local_hash = ::dsn::message_ex::s_local_hash;\n        header->server.error_code.local_code = code;\n    }\n    return code;\n}\n\ntask_code message_ex::rpc_code()\n{\n    if (local_rpc_code != ::dsn::TASK_CODE_INVALID) {\n        return local_rpc_code;\n    }\n\n    auto binary_hash = header->rpc_code.local_hash;\n    if (binary_hash != 0 && binary_hash == ::dsn::message_ex::s_local_hash) {\n        local_rpc_code = dsn::task_code(header->rpc_code.local_code);\n    } else {\n        local_rpc_code = dsn::task_code::try_get(header->rpc_name, ::dsn::TASK_CODE_INVALID);\n        header->rpc_code.local_hash = ::dsn::message_ex::s_local_hash;\n        header->rpc_code.local_code = local_rpc_code.code();\n    }\n\n    return local_rpc_code;\n}\n\nmessage_ex *message_ex::create_receive_message(const blob &data)\n{\n    message_ex *msg = new message_ex();\n    msg->header = (message_header *)data.data();\n    msg->_is_read = true;\n    // the message_header is hidden ahead of the buffer\n    auto data2 = data.range((int)sizeof(message_header));\n    msg->buffers.push_back(data2);\n\n    // dbg_dassert(msg->header->body_length > 0, \"message %s is empty!\", msg->header->rpc_name);\n    return msg;\n}\n\nmessage_ex *message_ex::create_received_request(dsn::task_code code,\n                                                dsn_msg_serialize_format format,\n                                                void *buffer,\n                                                int size,\n                                                int thread_hash,\n                                                uint64_t partition_hash)\n{\n    ::dsn::blob bb((const char *)buffer, 0, size);\n    auto msg = ::dsn::message_ex::create_receive_message_with_standalone_header(bb);\n    msg->local_rpc_code = code;\n    const char *name = code.to_string();\n    strncpy(msg->header->rpc_name, name, sizeof(msg->header->rpc_name) - 1);\n    msg->header->rpc_name[sizeof(msg->header->rpc_name) - 1] = '\\0';\n\n    msg->header->client.thread_hash = thread_hash;\n    msg->header->client.partition_hash = partition_hash;\n    msg->header->context.u.serialize_format = format;\n    msg->add_ref(); // released by callers explicitly using release_ref\n    return msg;\n}\n\nmessage_ex *message_ex::create_receive_message_with_standalone_header(const blob &data)\n{\n    message_ex *msg = new message_ex();\n    size_t header_size = sizeof(message_header);\n    std::string str(header_size, '\\0');\n    msg->header = reinterpret_cast<message_header *>(const_cast<char *>(str.data()));\n\n    msg->buffers.emplace_back(blob::create_from_bytes(std::move(str)));\n    msg->buffers.push_back(data);\n\n    msg->header->body_length = data.length();\n    msg->_is_read = true;\n    // we skip the message header\n    msg->_rw_index = 1;\n\n    return msg;\n}\n\nmessage_ex *message_ex::copy_message_no_reply(const message_ex &old_msg)\n{\n    message_ex *msg = new message_ex();\n    size_t header_size = sizeof(message_header);\n    std::string str(header_size, '\\0');\n    msg->header = reinterpret_cast<message_header *>(const_cast<char *>(str.data()));\n\n    msg->buffers.emplace_back(blob::create_from_bytes(std::move(str)));\n    if (old_msg.buffers.size() == 1) {\n        // if old_msg only has header, consider its header as data\n        msg->buffers.emplace_back(old_msg.buffers[0]);\n    } else {\n        msg->buffers.emplace_back(old_msg.buffers[1]);\n    }\n\n    msg->header->body_length = msg->buffers[1].length();\n    msg->header->context.u.serialize_format = old_msg.header->context.u.serialize_format;\n    msg->_is_read = true;\n    msg->_rw_index = 1;\n    msg->_rw_offset = old_msg._rw_offset;\n    msg->local_rpc_code = old_msg.local_rpc_code;\n    msg->add_ref();\n\n    return msg;\n}\n\nmessage_ex *message_ex::copy(bool clone_content, bool copy_for_receive)\n{\n    dassert(this->_rw_committed, \"should not copy the message when read/write is not committed\");\n\n    // ATTENTION:\n    // - if this message is a written message, set copied message's write pointer to the end,\n    //   then you can continue to append data to the copied message.\n    //\n    // - if this message is a read message, set copied message's read pointer to the beginning,\n    //   then you can read data from the beginning.\n    //\n    // - if copy_for_receive is set, it means that we want to make a receiving message from a\n    //   sending message. which is usually useful when you want to\n    //   write mock for modules which use rpc.\n\n    message_ex *msg = new message_ex();\n    msg->to_address = to_address;\n    msg->local_rpc_code = local_rpc_code;\n    msg->hdr_format = hdr_format;\n\n    if (!copy_for_receive)\n        msg->_is_read = _is_read;\n    else\n        msg->_is_read = true;\n\n    // received message\n    if (msg->_is_read) {\n        // leave _rw_index and _rw_offset as initial state, pointing to the beginning of the buffer\n    }\n    // send message\n    else {\n        msg->server_address = server_address;\n        // copy the orignal value, pointing to the end of the buffer\n        msg->_rw_index = _rw_index;\n        msg->_rw_offset = _rw_offset;\n    }\n\n    if (!clone_content) {\n        msg->header = header; // header is within the buffer\n        msg->buffers = buffers;\n    } else {\n        int total_length = body_size() + sizeof(dsn::message_header);\n        std::shared_ptr<char> recv_buffer(dsn::utils::make_shared_array<char>(total_length));\n        char *ptr = recv_buffer.get();\n        int i = 0;\n\n        if ((const char *)header != buffers[0].data()) {\n            memcpy(ptr, (const void *)header, sizeof(message_header));\n            ptr += sizeof(message_header);\n        }\n\n        for (dsn::blob &bb : buffers) {\n            memcpy(ptr, bb.data(), bb.length());\n            i += bb.length();\n            ptr += bb.length();\n        }\n        dassert(\n            i == total_length, \"%d VS %d, rpc_name = %s\", i, total_length, msg->header->rpc_name);\n\n        auto data = dsn::blob(recv_buffer, total_length);\n\n        msg->header = (message_header *)data.data();\n        if (msg->_is_read)\n            msg->buffers.push_back(data.range((int)sizeof(message_header)));\n        else\n            msg->buffers.push_back(data);\n    }\n    return msg;\n}\n\nmessage_ex *message_ex::copy_and_prepare_send(bool clone_content)\n{\n    auto copy = this->copy(clone_content, false);\n\n    if (_is_read) {\n        // the message_header is hidden ahead of the buffer, expose it to buffer\n        dassert(buffers.size() == 1, \"there must be only one buffer for read msg\");\n        dassert((char *)header + sizeof(message_header) == (char *)buffers[0].data(),\n                \"header and content must be contigous\");\n\n        copy->buffers[0] = copy->buffers[0].range(-(int)sizeof(message_header));\n\n        // switch the flag\n        copy->_is_read = false;\n    }\n\n    return copy;\n}\n\nmessage_ex *message_ex::create_request(dsn::task_code rpc_code,\n                                       int timeout_milliseconds,\n                                       int thread_hash,\n                                       uint64_t partition_hash)\n{\n    message_ex *msg = new message_ex();\n    msg->_is_read = false;\n    msg->prepare_buffer_header();\n\n    // init header\n    auto &hdr = *msg->header;\n    memset(static_cast<void *>(&hdr), 0, sizeof(hdr));\n    hdr.hdr_type = *(uint32_t *)\"RDSN\";\n    hdr.hdr_length = sizeof(message_header);\n    hdr.hdr_crc32 = hdr.body_crc32 = CRC_INVALID;\n\n    // if thread_hash == 0 && partition_hash != 0,\n    // thread_hash is computed from partition_hash in rpc_engine\n    hdr.client.thread_hash = thread_hash;\n    hdr.client.partition_hash = partition_hash;\n\n    task_spec *sp = task_spec::get(rpc_code);\n    if (0 == timeout_milliseconds) {\n        hdr.client.timeout_ms = sp->rpc_timeout_milliseconds;\n    } else {\n        hdr.client.timeout_ms = timeout_milliseconds;\n    }\n\n    msg->local_rpc_code = rpc_code;\n    strncpy(hdr.rpc_name, sp->name.c_str(), sizeof(hdr.rpc_name) - 1);\n    hdr.rpc_name[sizeof(hdr.rpc_name) - 1] = '\\0';\n    hdr.rpc_code.local_code = (uint32_t)rpc_code;\n    hdr.rpc_code.local_hash = s_local_hash;\n\n    hdr.id = new_id();\n\n    hdr.context.u.is_request = true;\n    hdr.context.u.serialize_format = sp->rpc_msg_payload_serialize_default_format;\n    hdr.context.u.is_forward_supported = true;\n\n    msg->hdr_format = sp->rpc_call_header_format;\n\n    return msg;\n}\n\nmessage_ex *message_ex::create_response()\n{\n    message_ex *msg = new message_ex();\n    msg->_is_read = false;\n    msg->prepare_buffer_header();\n\n    // init header\n    auto &hdr = *msg->header;\n    hdr = *header; // copy request header\n    hdr.hdr_crc32 = hdr.body_crc32 = CRC_INVALID;\n    hdr.body_length = 0;\n    hdr.context.u.is_request = false;\n\n    // ATTENTION: the from_address may not be the primary address of this node\n    // if there are more than one ports listened and the to_address is not equal to\n    // the primary address.\n    msg->header->from_address = to_address;\n    msg->to_address = header->from_address;\n    msg->io_session = io_session;\n    msg->hdr_format = hdr_format;\n\n    if (local_rpc_code != TASK_CODE_INVALID) {\n        task_spec *request_sp = task_spec::get(local_rpc_code);\n        task_spec *response_sp = task_spec::get(request_sp->rpc_paired_code);\n        msg->local_rpc_code = response_sp->code;\n        strncpy(hdr.rpc_name, response_sp->name.c_str(), sizeof(hdr.rpc_name) - 1);\n        hdr.rpc_name[sizeof(hdr.rpc_name) - 1] = '\\0';\n        hdr.rpc_code.local_code = msg->local_rpc_code;\n        hdr.rpc_code.local_hash = s_local_hash;\n\n        // join point\n        request_sp->on_rpc_create_response.execute(this, msg);\n    } else {\n        msg->local_rpc_code = TASK_CODE_INVALID;\n        std::string ack_rpc_name(header->rpc_name);\n        ack_rpc_name += \"_ACK\";\n        strncpy(hdr.rpc_name, ack_rpc_name.c_str(), sizeof(hdr.rpc_name) - 1);\n        hdr.rpc_name[sizeof(hdr.rpc_name) - 1] = '\\0';\n        hdr.rpc_code.local_code = TASK_CODE_INVALID;\n        hdr.rpc_code.local_hash = s_local_hash;\n    }\n\n    return msg;\n}\n\nvoid message_ex::prepare_buffer_header()\n{\n    size_t header_size = sizeof(message_header);\n    auto ptr(dsn::utils::make_shared_array<char>(header_size));\n\n    // here we should call placement new,\n    // so the gpid & rpc_address can be initialized\n    new (ptr.get())(message_header);\n    this->header = (message_header *)ptr.get();\n\n    ::dsn::blob buffer(std::move(ptr), header_size);\n    this->buffers.push_back(buffer);\n    this->_rw_index = 0;\n    this->_rw_offset = header_size;\n}\n\nvoid message_ex::release_buffer_header()\n{\n    // we should call destructor explicitly\n    // as the header is constructed with placement new, see@prepare_buffer_header\n    header->~message_header();\n}\n\nvoid message_ex::write_next(void **ptr, size_t *size, size_t min_size)\n{\n    // printf(\"%p %s\\n\", this, __FUNCTION__);\n    dassert(!this->_is_read && this->_rw_committed,\n            \"there are pending msg write not committed\"\n            \", please invoke dsn_msg_write_next and dsn_msg_write_commit in pairs\");\n    auto ptr_data(utils::make_shared_array<char>(min_size));\n    *size = min_size;\n    *ptr = ptr_data.get();\n    this->_rw_committed = false;\n\n    ::dsn::blob buffer(ptr_data, min_size);\n    this->_rw_index++;\n    this->_rw_offset = 0;\n    this->buffers.push_back(buffer);\n\n    dassert(this->_rw_index + 1 == (int)this->buffers.size(),\n            \"message write buffer count is not right\");\n}\n\nvoid message_ex::write_commit(size_t size)\n{\n    // printf(\"%p %s\\n\", this, __FUNCTION__);\n    dassert(!this->_rw_committed,\n            \"there are no pending msg write to be committed\"\n            \", please invoke dsn_msg_write_next and dsn_msg_write_commit in pairs\");\n\n    this->_rw_offset += (int)size;\n    *this->buffers.rbegin() = this->buffers.rbegin()->range(0, (int)this->_rw_offset);\n    this->_rw_committed = true;\n    this->header->body_length += (int)size;\n}\n\nbool message_ex::read_next(void **ptr, size_t *size)\n{\n    // printf(\"%p %s %d\\n\", this, __FUNCTION__, utils::get_current_tid());\n    dassert(this->_is_read && this->_rw_committed,\n            \"there are pending msg read not committed\"\n            \", please invoke dsn_msg_read_next and dsn_msg_read_commit in pairs\");\n\n    int idx = this->_rw_index;\n    if (-1 == idx || this->_rw_offset == static_cast<int>(this->buffers[idx].length())) {\n        idx = ++this->_rw_index;\n        this->_rw_offset = 0;\n    }\n\n    if (idx < (int)this->buffers.size()) {\n        this->_rw_committed = false;\n        *ptr = (void *)(this->buffers[idx].data() + this->_rw_offset);\n        *size = (size_t)this->buffers[idx].length() - this->_rw_offset;\n        return true;\n    } else {\n        *ptr = nullptr;\n        *size = 0;\n        return false;\n    }\n}\n\nbool message_ex::read_next(blob &data)\n{\n    // printf(\"%p %s %d\\n\", this, __FUNCTION__, utils::get_current_tid());\n    dassert(this->_is_read && this->_rw_committed,\n            \"there are pending msg read not committed\"\n            \", please invoke dsn_msg_read_next and dsn_msg_read_commit in pairs\");\n\n    int idx = this->_rw_index;\n    if (-1 == idx || this->_rw_offset == static_cast<int>(this->buffers[idx].length())) {\n        idx = ++this->_rw_index;\n        this->_rw_offset = 0;\n    }\n\n    if (idx < (int)this->buffers.size()) {\n        this->_rw_committed = false;\n        data = this->buffers[idx].range(this->_rw_offset);\n        return true;\n    } else {\n        data = blob();\n        return false;\n    }\n}\n\nvoid message_ex::read_commit(size_t size)\n{\n    // printf(\"%p %s\\n\", this, __FUNCTION__);\n    dassert(!this->_rw_committed,\n            \"there are no pending msg read to be committed\"\n            \", please invoke dsn_msg_read_next and dsn_msg_read_commit in pairs\");\n\n    dassert(-1 != this->_rw_index, \"no buffer in curent msg is under read\");\n    this->_rw_offset += (int)size;\n    this->_rw_committed = true;\n}\n\nvoid message_ex::restore_read()\n{\n    _rw_index = -1;\n    _rw_committed = true;\n    _rw_offset = 0;\n}\n\nvoid *message_ex::rw_ptr(size_t offset_begin)\n{\n    // printf(\"%p %s\\n\", this, __FUNCTION__);\n    int i_max = (int)this->buffers.size();\n\n    if (!_is_read)\n        offset_begin += sizeof(message_header);\n\n    for (int i = 0; i < i_max; i++) {\n        size_t c_length = (size_t)(this->buffers[i].length());\n        if (offset_begin < c_length) {\n            return (void *)(this->buffers[i].data() + offset_begin);\n        } else {\n            offset_begin -= c_length;\n        }\n    }\n    return nullptr;\n}\n\n} // end namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/rpc_task.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"runtime/task/task_engine.h\"\n#include <dsn/tool-api/task.h>\n\nnamespace dsn {\n\nrpc_request_task::rpc_request_task(message_ex *request, rpc_request_handler &&h, service_node *node)\n    : task(request->rpc_code(), request->header->client.thread_hash, node),\n      _request(request),\n      _handler(std::move(h)),\n      _enqueue_ts_ns(0)\n{\n    dbg_dassert(\n        TASK_TYPE_RPC_REQUEST == spec().type,\n        \"%s is not a RPC_REQUEST task, please use DEFINE_TASK_CODE_RPC to define the task code\",\n        spec().name.c_str());\n    _request->add_ref(); // released in dctor\n}\n\nrpc_request_task::~rpc_request_task()\n{\n    _request->release_ref(); // added in ctor\n}\n\nvoid rpc_request_task::enqueue()\n{\n    if (spec().rpc_request_dropped_before_execution_when_timeout) {\n        _enqueue_ts_ns = dsn_now_ns();\n    }\n    task::enqueue(node()->computation()->get_pool(spec().pool_code));\n}\n\nrpc_response_task::rpc_response_task(message_ex *request,\n                                     const rpc_response_handler &cb,\n                                     int hash,\n                                     service_node *node)\n    : rpc_response_task(request, rpc_response_handler(cb), hash, node)\n{\n}\n\nrpc_response_task::rpc_response_task(message_ex *request,\n                                     rpc_response_handler &&cb,\n                                     int hash,\n                                     service_node *node)\n    : task(task_spec::get(request->local_rpc_code)->rpc_paired_code,\n           hash == 0 ? request->header->client.thread_hash : hash,\n           node),\n      _cb(std::move(cb))\n{\n    _is_null = (_cb == nullptr);\n\n    set_error_code(ERR_IO_PENDING);\n\n    dbg_dassert(TASK_TYPE_RPC_RESPONSE == spec().type,\n                \"%s is not of RPC_RESPONSE type, please use DEFINE_TASK_CODE_RPC to define the \"\n                \"request task code\",\n                spec().name.c_str());\n\n    _request = request;\n    _response = nullptr;\n\n    _caller_pool = get_current_worker() ? get_current_worker()->pool() : nullptr;\n\n    _request->add_ref(); // released in dctor\n}\n\nrpc_response_task::~rpc_response_task()\n{\n    _request->release_ref(); // added in ctor\n\n    if (_response != nullptr)\n        _response->release_ref(); // added in enqueue\n}\n\nbool rpc_response_task::enqueue(error_code err, message_ex *reply)\n{\n    set_error_code(err);\n\n    if (_response != nullptr)\n        _response->release_ref(); // added in previous enqueue\n\n    _response = reply;\n\n    if (nullptr != reply) {\n        reply->add_ref(); // released in dctor\n    }\n\n    bool ret = true;\n    if (!spec().on_rpc_response_enqueue.execute(this, true)) {\n        set_error_code(ERR_NETWORK_FAILURE);\n        ret = false;\n    }\n\n    rpc_response_task::enqueue();\n    return ret;\n}\n\nvoid rpc_response_task::enqueue()\n{\n    if (_caller_pool)\n        task::enqueue(_caller_pool);\n\n    // possible when it is called in non-rDSN threads\n    else {\n        auto pool = node()->computation()->get_pool(spec().pool_code);\n        task::enqueue(pool);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/thrift_message_parser.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"thrift_message_parser.h\"\n\n#include <dsn/service_api_c.h>\n#include <dsn/cpp/serialization_helper/thrift_helper.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n#include <dsn/cpp/message_utils.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/crc.h>\n#include <dsn/utility/endians.h>\n#include <dsn/tool-api/rpc_message.h>\n\nnamespace dsn {\n\n//                 //\n// Request Parsing //\n//                 //\n\n///\n/// For version 0:\n/// |<--              fixed-size request header              -->|<--request body-->|\n/// |-\"THFT\"-|- hdr_version + hdr_length -|-  request_meta_v0  -|-      blob      -|\n/// |-\"THFT\"-|-  uint32(0)  + uint32(48) -|-      36bytes      -|-                -|\n/// |-               12bytes             -|-      36bytes      -|-                -|\n///\n/// For version 1:\n/// |<--          fixed-size request header           -->| <--        request body        -->|\n/// |-\"THFT\"-|- hdr_version + meta_length + body_length -|- thrift_request_meta_v1 -|- blob -|\n/// |-\"THFT\"-|-  uint32(1)  +   uint32    +    uint32   -|-      thrift struct     -|-      -|\n/// |-                      16bytes                     -|-      thrift struct     -|-      -|\n///\n/// TODO(wutao1): remove v0 once it has no user\n\n// \"THFT\" + uint32(hdr_version) + uint32(body_length) + uint32(meta_length)\nstatic constexpr size_t HEADER_LENGTH_V1 = 16;\n\n// \"THFT\" + uint32(hdr_version)\nstatic constexpr size_t THFT_HDR_VERSION_LENGTH = 8;\n\n// \"THFT\" + uint32(hdr_version) + uint32(hdr_length) + 36bytes(request_meta_v0)\nstatic constexpr size_t HEADER_LENGTH_V0 = 48;\n\nstatic void parse_request_meta_v0(data_input &input, /*out*/ request_meta_v0 &meta)\n{\n    meta.hdr_crc32 = input.read_u32();\n    meta.body_length = input.read_u32();\n    meta.body_crc32 = input.read_u32();\n    meta.app_id = input.read_u32();\n    meta.partition_index = input.read_u32();\n    meta.client_timeout = input.read_u32();\n    meta.client_thread_hash = input.read_u32();\n    meta.client_partition_hash = input.read_u64();\n}\n\nstatic int32_t gpid_to_thread_hash(gpid id)\n{\n    static const int magic_number = 7919;\n    return id.get_app_id() * magic_number + id.get_partition_index();\n}\n\n// Reads the requests's name, seqid, and TMessageType from the binary data,\n// and constructs a `message_ex` object.\nstatic message_ex *create_message_from_request_blob(const blob &body_data)\n{\n    dsn::message_ex *msg = message_ex::create_receive_message_with_standalone_header(body_data);\n    dsn::message_header *dsn_hdr = msg->header;\n\n    dsn::rpc_read_stream stream(msg);\n    ::dsn::binary_reader_transport binary_transport(stream);\n    boost::shared_ptr<::dsn::binary_reader_transport> trans_ptr(\n        &binary_transport, [](::dsn::binary_reader_transport *) {});\n    ::apache::thrift::protocol::TBinaryProtocol iprot(trans_ptr);\n\n    std::string fname;\n    ::apache::thrift::protocol::TMessageType mtype;\n    int32_t seqid;\n    iprot.readMessageBegin(fname, mtype, seqid);\n    dsn_hdr->id = seqid;\n    strncpy(dsn_hdr->rpc_name, fname.c_str(), sizeof(dsn_hdr->rpc_name) - 1);\n    dsn_hdr->rpc_name[sizeof(dsn_hdr->rpc_name) - 1] = '\\0';\n\n    if (mtype == ::apache::thrift::protocol::T_CALL ||\n        mtype == ::apache::thrift::protocol::T_ONEWAY) {\n        dsn_hdr->context.u.is_request = 1;\n    }\n    if (dsn_hdr->context.u.is_request != 1) {\n        derror(\"invalid message type: %d\", mtype);\n        delete msg;\n        /// set set rpc_read_stream::_msg to nullptr,\n        /// to avoid the dstor to call read_commit of _msg, which is deleted here.\n        stream.set_read_msg(nullptr);\n        return nullptr;\n    }\n    dsn_hdr->context.u.serialize_format = DSF_THRIFT_BINARY; // always serialize in thrift binary\n\n    // common fields\n    msg->hdr_format = NET_HDR_THRIFT;\n    dsn_hdr->hdr_type = THRIFT_HDR_SIG;\n    dsn_hdr->hdr_length = sizeof(message_header);\n    dsn_hdr->hdr_crc32 = msg->header->body_crc32 = CRC_INVALID;\n    return msg;\n}\n\n// Parses the request's fixed-size header.\n//\n// For version 0:\n// |-\"THFT\"-|- hdr_version + hdr_length -|-  request_meta_v0  -|\n//\n// For version 1:\n// |-\"THFT\"-|- hdr_version + meta_length + body_length -|\n//\nbool thrift_message_parser::parse_request_header(message_reader *reader, int &read_next)\n{\n    blob buf = reader->buffer();\n    // make sure there is enough space for 'THFT' and header_version\n    if (buf.size() < THFT_HDR_VERSION_LENGTH) {\n        read_next = THFT_HDR_VERSION_LENGTH - buf.size();\n        return false;\n    }\n\n    // The first 4 bytes is \"THFT\"\n    data_input input(buf);\n    if (memcmp(buf.data(), \"THFT\", 4) != 0) {\n        derror(\"hdr_type mismatch %s\", message_parser::get_debug_string(buf.data()).c_str());\n        read_next = -1;\n        return false;\n    }\n    input.skip(4);\n\n    // deal with different versions\n    int header_version = input.read_u32();\n    if (0 == header_version) {\n        if (buf.size() < HEADER_LENGTH_V0) {\n            read_next = HEADER_LENGTH_V0 - buf.size();\n            return false;\n        }\n\n        uint32_t hdr_length = input.read_u32();\n        if (hdr_length != HEADER_LENGTH_V0) {\n            derror(\"hdr_length should be %u, but %u\", HEADER_LENGTH_V0, hdr_length);\n            read_next = -1;\n            return false;\n        }\n\n        parse_request_meta_v0(input, *_meta_v0);\n        reader->consume_buffer(HEADER_LENGTH_V0);\n    } else if (1 == header_version) {\n        if (buf.size() < HEADER_LENGTH_V1) {\n            read_next = HEADER_LENGTH_V1 - buf.size();\n            return false;\n        }\n\n        _v1_specific_vars->_meta_length = input.read_u32();\n        _v1_specific_vars->_body_length = input.read_u32();\n        reader->consume_buffer(HEADER_LENGTH_V1);\n    } else {\n        derror(\"invalid hdr_version %d\", _header_version);\n        read_next = -1;\n        return false;\n    }\n    _header_version = header_version;\n\n    return true;\n}\n\nmessage_ex *thrift_message_parser::parse_request_body_v0(message_reader *reader, int &read_next)\n{\n    blob buf = reader->buffer();\n\n    // Parses request data\n    // TODO(wutao1): handle the case where body_length is too short to parse.\n    if (buf.size() < _meta_v0->body_length) {\n        read_next = _meta_v0->body_length - buf.size();\n        return nullptr;\n    }\n\n    buf = buf.range(0, _meta_v0->body_length);\n    reader->consume_buffer(_meta_v0->body_length);\n    message_ex *msg = create_message_from_request_blob(buf);\n    if (msg == nullptr) {\n        read_next = -1;\n        reset();\n        return nullptr;\n    }\n\n    read_next = (reader->_buffer_occupied >= HEADER_LENGTH_V0\n                     ? 0\n                     : HEADER_LENGTH_V0 - reader->_buffer_occupied);\n\n    msg->header->body_length = _meta_v0->body_length;\n    dcheck_eq(msg->header->body_length, msg->buffers[1].size());\n    msg->header->gpid.set_app_id(_meta_v0->app_id);\n    msg->header->gpid.set_partition_index(_meta_v0->partition_index);\n    msg->header->client.timeout_ms = _meta_v0->client_timeout;\n    msg->header->client.thread_hash = _meta_v0->client_thread_hash;\n    msg->header->client.partition_hash = _meta_v0->client_partition_hash;\n    reset();\n    return msg;\n}\n\nmessage_ex *thrift_message_parser::parse_request_body_v1(message_reader *reader, int &read_next)\n{\n    // Parses request meta\n    blob buf = reader->buffer();\n    if (!_v1_specific_vars->_meta_parsed) {\n        if (buf.size() < _v1_specific_vars->_meta_length) {\n            read_next = _v1_specific_vars->_meta_length - buf.size();\n            return nullptr;\n        }\n\n        binary_reader meta_reader(buf);\n        ::dsn::binary_reader_transport trans(meta_reader);\n        boost::shared_ptr<::dsn::binary_reader_transport> transport(\n            &trans, [](::dsn::binary_reader_transport *) {});\n        ::apache::thrift::protocol::TBinaryProtocol proto(transport);\n        _v1_specific_vars->_meta_v1->read(&proto);\n        _v1_specific_vars->_meta_parsed = true;\n    }\n    buf = buf.range(_v1_specific_vars->_meta_length);\n\n    // Parses request body\n    if (buf.size() < _v1_specific_vars->_body_length) {\n        read_next = _v1_specific_vars->_body_length - buf.size();\n        return nullptr;\n    }\n    buf = buf.range(0, _v1_specific_vars->_body_length);\n    reader->consume_buffer(_v1_specific_vars->_meta_length + _v1_specific_vars->_body_length);\n    message_ex *msg = create_message_from_request_blob(buf);\n    if (msg == nullptr) {\n        read_next = -1;\n        reset();\n        return nullptr;\n    }\n\n    read_next = (reader->_buffer_occupied >= HEADER_LENGTH_V1\n                     ? 0\n                     : HEADER_LENGTH_V1 - reader->_buffer_occupied);\n\n    msg->header->body_length = _v1_specific_vars->_body_length;\n    dcheck_eq(msg->header->body_length, msg->buffers[1].size());\n    msg->header->gpid.set_app_id(_v1_specific_vars->_meta_v1->app_id);\n    msg->header->gpid.set_partition_index(_v1_specific_vars->_meta_v1->partition_index);\n    msg->header->client.timeout_ms = _v1_specific_vars->_meta_v1->client_timeout;\n    msg->header->client.thread_hash = gpid_to_thread_hash(msg->header->gpid);\n    msg->header->client.partition_hash = _v1_specific_vars->_meta_v1->client_partition_hash;\n    msg->header->context.u.is_backup_request = _v1_specific_vars->_meta_v1->is_backup_request;\n    reset();\n    return msg;\n}\n\nmessage_ex *thrift_message_parser::get_message_on_receive(message_reader *reader,\n                                                          /*out*/ int &read_next)\n{\n    read_next = 4096;\n    // Parses request header, -1 means header has not been parsed\n    if (-1 == _header_version) {\n        if (!parse_request_header(reader, read_next)) {\n            return nullptr;\n        }\n    }\n\n    // Parses request body\n    switch (_header_version) {\n    case 0:\n        return parse_request_body_v0(reader, read_next);\n    case 1:\n        return parse_request_body_v1(reader, read_next);\n    default:\n        assert(\"invalid header version\");\n    }\n\n    return nullptr;\n}\n\nvoid thrift_message_parser::reset()\n{\n    _header_version = -1;\n    _meta_v0->clear();\n    _v1_specific_vars->clear();\n}\n\n//                   //\n// Response Encoding //\n//                   //\n\nvoid thrift_message_parser::prepare_on_send(message_ex *msg)\n{\n    auto &header = msg->header;\n    auto &buffers = msg->buffers;\n\n    dassert(!header->context.u.is_request, \"only support send response\");\n    dassert(header->server.error_name[0], \"error name should be set\");\n    dassert(!buffers.empty(), \"buffers can not be empty\");\n\n    // write thrift response header and thrift message begin\n    binary_writer header_writer;\n    binary_writer_transport header_trans(header_writer);\n    boost::shared_ptr<binary_writer_transport> header_trans_ptr(&header_trans,\n                                                                [](binary_writer_transport *) {});\n    ::apache::thrift::protocol::TBinaryProtocol header_proto(header_trans_ptr);\n    // first total length, but we don't know the length, so firstly we put a placeholder\n    header_proto.writeI32(0);\n    // then the error_message\n    header_proto.writeString(string_view(header->server.error_name));\n    // then the thrift message begin\n    header_proto.writeMessageBegin(\n        header->rpc_name, ::apache::thrift::protocol::T_REPLY, header->id);\n\n    // write thrift message end\n    binary_writer end_writer;\n    binary_writer_transport end_trans(header_writer);\n    boost::shared_ptr<binary_writer_transport> end_trans_ptr(&end_trans,\n                                                             [](binary_writer_transport *) {});\n    ::apache::thrift::protocol::TBinaryProtocol end_proto(end_trans_ptr);\n    end_proto.writeMessageEnd();\n\n    // now let's set the total length\n    blob header_bb = header_writer.get_buffer();\n    blob end_bb = end_writer.get_buffer();\n    int32_t *total_length = reinterpret_cast<int32_t *>(const_cast<char *>(header_bb.data()));\n    *total_length = endian::hton(header_bb.length() + header->body_length + end_bb.length());\n\n    unsigned int dsn_size = sizeof(message_header) + header->body_length;\n    int dsn_buf_count = 0;\n    while (dsn_size > 0 && dsn_buf_count < buffers.size()) {\n        blob &buf = buffers[dsn_buf_count];\n        dassert(dsn_size >= buf.length(), \"%u VS %u\", dsn_size, buf.length());\n        dsn_size -= buf.length();\n        ++dsn_buf_count;\n    }\n    dassert(dsn_size == 0, \"dsn_size = %u\", dsn_size);\n\n    // put header_bb and end_bb at the end\n    buffers.resize(dsn_buf_count);\n    buffers.emplace_back(std::move(header_bb));\n    buffers.emplace_back(std::move(end_bb));\n}\n\nint thrift_message_parser::get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers)\n{\n    auto &msg_header = msg->header;\n    auto &msg_buffers = msg->buffers;\n\n    // leave buffers[0] to header\n    int i = 1;\n    // we must skip the dsn message header\n    unsigned int offset = sizeof(message_header);\n    unsigned int dsn_size = sizeof(message_header) + msg_header->body_length;\n    int dsn_buf_count = 0;\n    while (dsn_size > 0 && dsn_buf_count < msg_buffers.size()) {\n        blob &buf = msg_buffers[dsn_buf_count];\n        dassert(dsn_size >= buf.length(), \"%u VS %u\", dsn_size, buf.length());\n        dsn_size -= buf.length();\n        ++dsn_buf_count;\n\n        if (offset >= buf.length()) {\n            offset -= buf.length();\n            continue;\n        }\n        buffers[i].buf = (void *)(buf.data() + offset);\n        buffers[i].sz = buf.length() - offset;\n        offset = 0;\n        ++i;\n    }\n    dassert(dsn_size == 0, \"dsn_size = %u\", dsn_size);\n    dassert(dsn_buf_count + 2 == msg_buffers.size(), \"must have 2 more blob at the end\");\n\n    // set header\n    blob &header_bb = msg_buffers[dsn_buf_count];\n    buffers[0].buf = (void *)header_bb.data();\n    buffers[0].sz = header_bb.length();\n\n    // set end if need\n    blob &end_bb = msg_buffers[dsn_buf_count + 1];\n    if (end_bb.length() > 0) {\n        buffers[i].buf = (void *)end_bb.data();\n        buffers[i].sz = end_bb.length();\n        ++i;\n    }\n\n    return i;\n}\n\nthrift_message_parser::thrift_message_parser()\n    : _v1_specific_vars(new v1_specific_vars), _meta_v0(new request_meta_v0)\n{\n}\n\nthrift_message_parser::~thrift_message_parser() = default;\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/rpc/thrift_message_parser.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool-api/message_parser.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/endians.h>\n#include <gtest/gtest_prod.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\n#include \"request_meta_types.h\"\n\nnamespace dsn {\n\nstruct request_meta_v0\n{\n    void clear()\n    {\n        hdr_crc32 = 0;\n        body_length = 0;\n        body_crc32 = 0;\n        app_id = 0;\n        partition_index = 0;\n        client_timeout = 0;\n        client_thread_hash = 0;\n        client_partition_hash = 0;\n    }\n\n    uint32_t hdr_crc32 = 0;\n    uint32_t body_length = 0;\n    uint32_t body_crc32 = 0;\n    int32_t app_id = 0;\n    int32_t partition_index = 0;\n    int32_t client_timeout = 0;\n    int32_t client_thread_hash = 0;\n    uint64_t client_partition_hash = 0;\n};\n\nstruct v1_specific_vars\n{\n    v1_specific_vars() : _meta_v1(new thrift_request_meta_v1) {}\n\n    void clear()\n    {\n        _meta_v1.reset(new thrift_request_meta_v1);\n        _meta_parsed = false;\n        _meta_length = 0;\n        _body_length = 0;\n    }\n\n    bool _meta_parsed{false};\n    uint32_t _meta_length{0};\n    uint32_t _body_length{0};\n    std::unique_ptr<thrift_request_meta_v1> _meta_v1;\n};\n\n#define THRIFT_HDR_SIG (*(uint32_t *)\"THFT\")\n\nDEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_THRIFT)\n\n// Parses request sent in rDSN thrift protocol, which is\n// mainly used by our Java/GoLang/NodeJs/Python clients,\n// and encodes response to them.\nclass thrift_message_parser final : public message_parser\n{\npublic:\n    thrift_message_parser();\n\n    ~thrift_message_parser() override;\n\n    void reset() override;\n\n    message_ex *get_message_on_receive(message_reader *reader,\n                                       /*out*/ int &read_next) override;\n\n    // thrift response format:\n    //     <total_len(int32)> <thrift_string> <thrift_message_begin> <body_data(bytes)>\n    //     <thrift_message_end>\n    void prepare_on_send(message_ex *msg) override;\n\n    int get_buffers_on_send(message_ex *msg, /*out*/ send_buf *buffers) override;\n\nprivate:\n    message_ex *parse_request_body_v0(message_reader *reader,\n                                      /*out*/ int &read_next);\n\n    message_ex *parse_request_body_v1(message_reader *reader,\n                                      /*out*/ int &read_next);\n\n    bool parse_request_header(message_reader *reader, int &read_next);\n\nprivate:\n    friend class thrift_message_parser_test;\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_incomplete_second_field);\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_incomplete_v0_hdr_len);\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_invalid_v0_hdr_length);\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_valid_v0_hdr);\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_incomplete_v1_hdr);\n    FRIEND_TEST(thrift_message_parser_test, get_message_on_receive_valid_v1_hdr);\n\n    int _header_version{-1};\n\n    // meta version 1 specific variables\n    std::unique_ptr<v1_specific_vars> _v1_specific_vars;\n\n    // meta version 0 specific variables\n    std::unique_ptr<request_meta_v0> _meta_v0;\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/scheduler.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/rand.h>\n#include <dsn/tool/simulator.h>\n#include <dsn/service_api_c.h>\n#include <dsn/tool/node_scoper.h>\n#include \"scheduler.h\"\n#include \"env.sim.h\"\n#include <set>\n\nnamespace dsn {\nnamespace tools {\n\nvoid event_wheel::add_event(uint64_t ts, task *t)\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n\n    std::vector<event_entry> *evts;\n    auto itr = _events.find(ts);\n    if (itr != _events.end())\n        evts = itr->second;\n    else {\n        evts = new std::vector<event_entry>();\n        _events.insert(std::make_pair(ts, evts));\n    }\n\n    event_entry entry;\n    entry.app_task = t;\n    evts->push_back(entry);\n}\n\nvoid event_wheel::add_system_event(uint64_t ts, std::function<void()> t)\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n\n    std::vector<event_entry> *evts;\n    auto itr = _events.find(ts);\n    if (itr != _events.end())\n        evts = itr->second;\n    else {\n        evts = new std::vector<event_entry>();\n        _events.insert(std::make_pair(ts, evts));\n    }\n\n    event_entry entry;\n    entry.system_task = std::move(t);\n    entry.app_task = nullptr;\n    evts->push_back(entry);\n}\n\nstd::vector<event_entry> *event_wheel::pop_next_events(/*out*/ uint64_t &ts)\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n\n    std::vector<event_entry> *evts = NULL;\n    auto itr = _events.begin();\n    if (itr != _events.end()) {\n        evts = itr->second;\n        ts = itr->first;\n        _events.erase(itr);\n    }\n    return evts;\n}\n\nvoid event_wheel::clear()\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n    _events.clear();\n}\n\n//////////////////////////////////////////////////////////////////////////////////////////////\n\n__thread bool scheduler::_is_scheduling = false;\n\nscheduler::scheduler(void)\n{\n    _time_ns = 0;\n    _running = false;\n    _running_thread = nullptr;\n    task_worker::on_create.put_back(on_task_worker_create, \"simulation.on_task_worker_create\");\n    task_worker::on_start.put_back(on_task_worker_start, \"simulation.on_task_worker_start\");\n\n    for (int i = 0; i <= dsn::task_code::max(); i++) {\n        task_spec::get(i)->on_task_wait_pre.put_back(scheduler::on_task_wait,\n                                                     \"simulation.on_task_wait\");\n        task_spec::get(i)->on_task_wait_notified.put_back(scheduler::on_task_wait_notified,\n                                                          \"simulation.on_task_wait_notified\");\n    }\n\n    task_ext::register_ext(task_state_ext::deletor);\n    task_worker_ext::register_ext(sim_worker_state::deletor);\n}\n\nscheduler::~scheduler(void) {}\n\n/*static*/ void scheduler::on_task_worker_start(task_worker *worker)\n{\n    while (!scheduler::instance()._running) {\n        std::this_thread::sleep_for(std::chrono::milliseconds(1000));\n    }\n}\n\n/*static*/ void scheduler::on_task_worker_create(task_worker *worker)\n{\n    auto s = task_worker_ext::get_inited(worker);\n    s->worker = worker;\n    s->first_time_schedule = true;\n    s->in_continuation = false;\n    s->index = static_cast<int>(scheduler::instance()._threads.size());\n    scheduler::instance()._threads.push_back(s);\n}\n\n/*static*/ void scheduler::on_task_wait(task *waitor, task *waitee, uint32_t timeout_milliseconds)\n{\n    if (waitor == nullptr)\n        return;\n\n    if (waitee->state() < task_state::TASK_STATE_FINISHED) {\n        auto ts = task_ext::get_inited(waitee);\n        auto wks = task_worker_ext::get(task::get_current_worker());\n        ts->wait_threads.push_back(wks);\n\n        scheduler::instance().wait_schedule(true, false);\n    } else {\n        scheduler::instance().wait_schedule(true, true);\n    }\n}\n\n/*static*/ void scheduler::on_task_wait_notified(task *task)\n{\n    auto ts = task_ext::get(task);\n    if (ts != nullptr) {\n        for (auto &w : ts->wait_threads) {\n            w->is_continuation_ready = true;\n        }\n    }\n}\n\nvoid scheduler::add_task(task *tsk, task_queue *q)\n{\n    auto ts = task_ext::get_inited(tsk);\n    ts->queue = q;\n\n    auto delay = (uint64_t)tsk->delay_milliseconds() * 1000000;\n    tsk->set_delay(0);\n    _wheel.add_event(now_ns() + delay, tsk);\n}\n\nvoid scheduler::add_system_event(uint64_t ts_ns, std::function<void()> t)\n{\n    _wheel.add_system_event(ts_ns, t);\n}\n\nvoid scheduler::start()\n{\n    // init all checkers\n    std::vector<service_app *> apps;\n    service_app::get_all_service_apps(&apps);\n    for (checker_info &c : _checkers) {\n        checker *a_checker = c.creator();\n        a_checker->initialize(c.name, apps);\n        c.instance.reset(a_checker);\n    }\n\n    // set flag\n    _running = true;\n}\n\nvoid scheduler::add_checker(const std::string &name, checker::factory f)\n{\n    checker_info info;\n    info.name = name;\n    info.creator = f;\n\n    _checkers.emplace_back(std::move(info));\n}\n\nvoid scheduler::check()\n{\n    for (checker_info &c : _checkers) {\n        if (c.instance != nullptr)\n            c.instance->check();\n    }\n}\n\nvoid scheduler::wait_schedule(bool in_continue, bool is_continue_ready /*= false*/)\n{\n    auto s = task_worker_ext::get(task::get_current_worker());\n    s->in_continuation = in_continue;\n    s->is_continuation_ready = is_continue_ready;\n\n    if (s->first_time_schedule) {\n        s->first_time_schedule = false;\n        if (s->index == 0)\n            schedule();\n    } else {\n        schedule();\n    }\n    s->runnable.wait();\n}\n\nvoid scheduler::schedule()\n{\n    _is_scheduling = true;\n\n    check(); // check before schedule\n\n    while (true) {\n        // run ready workers whenever possible\n        std::vector<int> ready_workers;\n        for (auto &s : _threads) {\n            if ((s->in_continuation && s->is_continuation_ready) ||\n                (!s->in_continuation && s->worker->queue()->count() > 0)) {\n                ready_workers.push_back(s->index);\n            }\n        }\n\n        if (ready_workers.size() > 0) {\n            int i = rand::next_u32(0, (uint32_t)ready_workers.size() - 1);\n            _running_thread = _threads[ready_workers[i]];\n            _running_thread->runnable.release();\n\n            _is_scheduling = false;\n            return;\n        }\n\n        // otherwise, run the timed tasks\n        uint64_t ts = 0;\n        auto events = _wheel.pop_next_events(ts);\n        if (events) {\n            {\n                utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n                _time_ns = ts;\n            }\n\n            // randomize the events, and see\n            std::random_shuffle(\n                events->begin(), events->end(), [](int n) { return rand::next_u32(0, n - 1); });\n\n            for (auto e : *events) {\n                if (e.app_task != nullptr) {\n                    task *t = e.app_task;\n\n                    {\n                        node_scoper ns(t->node());\n                        t->enqueue();\n                    }\n\n                    t->release_ref(); // added by previous t->enqueue from app\n                } else {\n                    dassert(e.system_task != nullptr, \"app and system tasks cannot be both empty\");\n                    e.system_task();\n                }\n            }\n\n            delete events;\n            continue;\n        }\n\n        // wait a moment\n        std::this_thread::sleep_for(std::chrono::milliseconds(100));\n    }\n\n    _is_scheduling = false;\n}\n}\n} // end namespace\n"
  },
  {
    "path": "src/runtime/scheduler.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <dsn/tool/simulator.h>\n#include <dsn/utility/synchronize.h>\n\nnamespace dsn {\nnamespace tools {\n\nstruct event_entry\n{\n    task *app_task;\n    std::function<void()> system_task;\n};\n\nclass event_wheel\n{\npublic:\n    ~event_wheel() { clear(); }\n\n    void add_event(uint64_t ts, task *t);\n    void add_system_event(uint64_t ts, std::function<void()> t);\n    std::vector<event_entry> *pop_next_events(/*out*/ uint64_t &ts);\n    void clear();\n    bool has_more_events() const\n    {\n        utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n        return _events.size() > 0;\n    }\n\nprivate:\n    typedef std::map<uint64_t, std::vector<event_entry> *> Events;\n    Events _events;\n    mutable ::dsn::utils::ex_lock _lock;\n};\n\nstruct sim_worker_state\n{\n    utils::semaphore runnable;\n    int index;\n    task_worker *worker;\n    bool first_time_schedule;\n    bool in_continuation;\n    bool is_continuation_ready;\n\n    static void deletor(void *p) { delete (sim_worker_state *)p; }\n};\n\nclass checker;\nclass scheduler : public utils::singleton<scheduler>\n{\npublic:\n    void start();\n    uint64_t now_ns() const\n    {\n        utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n        return _time_ns;\n    }\n\n    void reset();\n    void add_task(task *task, task_queue *q);\n    void add_system_event(uint64_t ts_ns, std::function<void()> t);\n\n    // TODO: time delay for true, true\n    void wait_schedule(bool in_continue, bool is_continue_ready = false);\n    void add_checker(const std::string &name, checker::factory f);\n    static bool is_scheduling() { return _is_scheduling; }\n\npublic:\n    struct task_state_ext\n    {\n        task_queue *queue;\n        std::list<sim_worker_state *> wait_threads;\n\n        static void deletor(void *p) { delete (task_state_ext *)p; }\n\n        task_state_ext() { queue = nullptr; };\n    };\n    typedef object_extension_helper<sim_worker_state, task_worker> task_worker_ext;\n    typedef object_extension_helper<task_state_ext, task> task_ext;\n\nprivate:\n    event_wheel _wheel;\n    mutable ::dsn::utils::ex_lock _lock;\n    uint64_t _time_ns;\n    bool _running;\n    std::vector<sim_worker_state *> _threads;\n    sim_worker_state *_running_thread;\n    static __thread bool _is_scheduling;\n\n    struct checker_info\n    {\n        std::string name;\n        checker::factory creator;\n        std::unique_ptr<checker> instance;\n    };\n    std::vector<checker_info> _checkers;\n\nprivate:\n    scheduler(void);\n    ~scheduler(void);\n\n    void schedule();\n    void check();\n\n    static void on_task_worker_create(task_worker *worker);\n    static void on_task_worker_start(task_worker *worker);\n    static void on_task_wait(task *waitor, task *waitee, uint32_t timeout_milliseconds);\n    static void on_task_wait_notified(task *task);\n\n    friend class utils::singleton<scheduler>;\n};\n\n// ------------------  inline implementation ----------------------------\n\ninline void scheduler::reset() { _wheel.clear(); }\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.security)\n\nthrift_generate_cpp(\n    SECURITY_THRIFT_SRCS\n    SECURITY_THRIFT_HDRS\n    ${CMAKE_CURRENT_SOURCE_DIR}/security.thrift\n)\n\nset(MY_PROJ_SRC ${SECURITY_THRIFT_SRCS})\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_INC_PATH \"\")\n\nset(MY_PROJ_LIBS \"\")\n\nset(MY_PROJ_LIB_PATH \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_object()\n"
  },
  {
    "path": "src/runtime/security/access_controller.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"access_controller.h\"\n\n#include <dsn/utility/flags.h>\n#include <dsn/utility/strings.h>\n#include <dsn/utility/smart_pointers.h>\n#include \"meta_access_controller.h\"\n#include \"replica_access_controller.h\"\n\nnamespace dsn {\nnamespace security {\nDSN_DEFINE_bool(\"security\", enable_acl, false, \"whether enable access controller or not\");\nDSN_TAG_VARIABLE(enable_acl, FT_MUTABLE);\n\nDSN_DEFINE_string(\"security\", super_users, \"\", \"super user for access controller\");\n\naccess_controller::access_controller() { utils::split_args(FLAGS_super_users, _super_users, ','); }\n\naccess_controller::~access_controller() {}\n\nbool access_controller::pre_check(const std::string &user_name)\n{\n    if (!FLAGS_enable_acl || _super_users.find(user_name) != _super_users.end()) {\n        return true;\n    }\n    return false;\n}\n\nstd::unique_ptr<access_controller> create_meta_access_controller()\n{\n    return make_unique<meta_access_controller>();\n}\n\nstd::unique_ptr<access_controller> create_replica_access_controller(const std::string &name)\n{\n    return make_unique<replica_access_controller>(name);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/access_controller.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <unordered_set>\n\nnamespace dsn {\nclass message_ex;\nnamespace security {\n\nclass access_controller\n{\npublic:\n    access_controller();\n    virtual ~access_controller() = 0;\n\n    /**\n     * update the access controller\n     *    acls - the new acls to update\n     **/\n    virtual void update(const std::string &acls){};\n\n    /**\n     * check if the message received is allowd to do something.\n     *   msg - the message received\n     **/\n    virtual bool allowed(message_ex *msg) = 0;\n\nprotected:\n    bool pre_check(const std::string &user_name);\n    friend class meta_access_controller_test;\n\n    std::unordered_set<std::string> _super_users;\n};\n\nstd::unique_ptr<access_controller> create_meta_access_controller();\n\nstd::unique_ptr<access_controller> create_replica_access_controller(const std::string &name);\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/client_negotiation.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"client_negotiation.h\"\n#include \"negotiation_utils.h\"\n#include \"negotiation_manager.h\"\n\n#include <boost/algorithm/string/join.hpp>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace security {\nextern const std::set<std::string> supported_mechanisms;\n\nclient_negotiation::client_negotiation(rpc_session_ptr session) : negotiation(session)\n{\n    _name = fmt::format(\"CLIENT_NEGOTIATION(SERVER={})\", _session->remote_address().to_string());\n}\n\nvoid client_negotiation::start()\n{\n    ddebug_f(\"{}: start negotiation\", _name);\n    list_mechanisms();\n}\n\nvoid client_negotiation::list_mechanisms()\n{\n    _status = negotiation_status::type::SASL_LIST_MECHANISMS;\n    send(_status);\n}\n\nvoid client_negotiation::handle_response(error_code err, const negotiation_response &&response)\n{\n    if (err != ERR_OK) {\n        // ERR_HANDLER_NOT_FOUND means server is old version, which doesn't support authentication\n        if (ERR_HANDLER_NOT_FOUND == err) {\n            ddebug_f(\"{}: treat negotiation succeed because server is old version, which doesn't \"\n                     \"support authentication\",\n                     _name);\n            succ_negotiation();\n        } else {\n            fail_negotiation();\n        }\n        return;\n    }\n\n    // make the negotiation succeed if server doesn't enable auth\n    if (negotiation_status::type::SASL_AUTH_DISABLE == response.status) {\n        ddebug_f(\"{}: treat negotiation succeed as server doesn't enable it\", _name);\n        succ_negotiation();\n        return;\n    }\n\n    switch (_status) {\n    case negotiation_status::type::SASL_LIST_MECHANISMS:\n        on_recv_mechanisms(response);\n        break;\n    case negotiation_status::type::SASL_SELECT_MECHANISMS:\n        on_mechanism_selected(response);\n        break;\n    case negotiation_status::type::SASL_INITIATE:\n    case negotiation_status::type::SASL_CHALLENGE_RESP:\n        on_challenge(response);\n        break;\n    default:\n        fail_negotiation();\n    }\n}\n\nvoid client_negotiation::on_recv_mechanisms(const negotiation_response &resp)\n{\n    if (!check_status(resp.status, negotiation_status::type::SASL_LIST_MECHANISMS_RESP)) {\n        fail_negotiation();\n        return;\n    }\n\n    std::string match_mechanism;\n    std::vector<std::string> server_support_mechanisms;\n    std::string resp_string = resp.msg.to_string();\n    utils::split_args(resp_string.c_str(), server_support_mechanisms, ',');\n\n    for (const std::string &server_support_mechanism : server_support_mechanisms) {\n        if (supported_mechanisms.find(server_support_mechanism) != supported_mechanisms.end()) {\n            match_mechanism = server_support_mechanism;\n            break;\n        }\n    }\n\n    if (match_mechanism.empty()) {\n        dwarn_f(\"server only support mechanisms of ({}), can't find expected ({})\",\n                boost::join(supported_mechanisms, \",\"),\n                resp_string);\n        fail_negotiation();\n        return;\n    }\n\n    select_mechanism(match_mechanism);\n}\n\nvoid client_negotiation::on_mechanism_selected(const negotiation_response &resp)\n{\n    if (!check_status(resp.status, negotiation_status::type::SASL_SELECT_MECHANISMS_RESP)) {\n        fail_negotiation();\n        return;\n    }\n\n    // init client sasl\n    auto err_s = _sasl->init();\n    if (!err_s.is_ok()) {\n        dwarn_f(\"{}: initialize sasl client failed, error = {}, reason = {}\",\n                _name,\n                err_s.code().to_string(),\n                err_s.description());\n        fail_negotiation();\n        return;\n    }\n\n    // start client sasl, and send `SASL_INITIATE` to `server_negotiation` if everything is ok\n    blob start_output;\n    err_s = _sasl->start(_selected_mechanism, blob(), start_output);\n    if (err_s.is_ok() || ERR_SASL_INCOMPLETE == err_s.code()) {\n        _status = negotiation_status::type::SASL_INITIATE;\n        send(_status, std::move(start_output));\n    } else {\n        dwarn_f(\"{}: start sasl client failed, error = {}, reason = {}\",\n                _name,\n                err_s.code().to_string(),\n                err_s.description());\n        fail_negotiation();\n    }\n}\n\nvoid client_negotiation::on_challenge(const negotiation_response &challenge)\n{\n    if (challenge.status == negotiation_status::type::SASL_CHALLENGE) {\n        blob response_msg;\n        auto err = _sasl->step(challenge.msg, response_msg);\n        if (!err.is_ok() && err.code() != ERR_SASL_INCOMPLETE) {\n            dwarn_f(\"{}: negotiation failed, reason = {}\", _name, err.description());\n            fail_negotiation();\n            return;\n        }\n\n        _status = negotiation_status::type::SASL_CHALLENGE_RESP;\n        send(_status, std::move(response_msg));\n        return;\n    }\n\n    if (challenge.status == negotiation_status::type::SASL_SUCC) {\n        succ_negotiation();\n        return;\n    }\n\n    dwarn_f(\"{}: recv wrong negotiation msg type: {}\", _name, enum_to_string(challenge.status));\n    fail_negotiation();\n}\n\nvoid client_negotiation::select_mechanism(const std::string &mechanism)\n{\n    _selected_mechanism = mechanism;\n    _status = negotiation_status::type::SASL_SELECT_MECHANISMS;\n\n    send(_status, blob::create_from_bytes(mechanism.data(), mechanism.length()));\n}\n\nvoid client_negotiation::send(negotiation_status::type status, const blob &msg)\n{\n    auto req = dsn::make_unique<negotiation_request>();\n    req->status = status;\n    req->msg = msg;\n\n    negotiation_rpc rpc(std::move(req), RPC_NEGOTIATION);\n    rpc.call(_session->remote_address(), nullptr, [rpc](error_code err) mutable {\n        negotiation_manager::on_negotiation_response(err, rpc);\n    });\n}\n\nvoid client_negotiation::succ_negotiation()\n{\n    _status = negotiation_status::type::SASL_SUCC;\n    _session->set_negotiation_succeed();\n    ddebug_f(\"{}: negotiation succeed\", _name);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/client_negotiation.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"negotiation.h\"\n\nnamespace dsn {\nnamespace security {\n\n// client_negotiation negotiates a session on client side.\nclass client_negotiation : public negotiation\n{\npublic:\n    explicit client_negotiation(rpc_session_ptr session);\n\n    void start() override;\n    void handle_response(error_code err, const negotiation_response &&response);\n\nprivate:\n    void on_recv_mechanisms(const negotiation_response &resp);\n    void on_mechanism_selected(const negotiation_response &resp);\n    void on_challenge(const negotiation_response &resp);\n\n    void list_mechanisms();\n    void select_mechanism(const std::string &mechanism);\n    void send(negotiation_status::type status, const blob &msg = blob());\n    void succ_negotiation();\n\n    friend class client_negotiation_test;\n};\n\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/init.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"kinit_context.h\"\n#include \"sasl_init.h\"\n#include \"negotiation_manager.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_string(krb5_config);\nDSN_DECLARE_string(krb5_keytab);\n\n/***\n * set kerberos envs(for more details:\n * https://web.mit.edu/kerberos/krb5-1.12/doc/admin/env_variables.html)\n */\nvoid set_krb5_env(bool is_server)\n{\n    setenv(\"KRB5CCNAME\", is_server ? \"MEMORY:pegasus-server\" : \"MEMORY:pegasus-client\", 1);\n    setenv(\"KRB5_CONFIG\", FLAGS_krb5_config, 1);\n    setenv(\"KRB5_KTNAME\", FLAGS_krb5_keytab, 1);\n    setenv(\"KRB5RCACHETYPE\", \"none\", 1);\n}\n\nerror_s init_kerberos(bool is_server)\n{\n    // set kerberos env\n    set_krb5_env(is_server);\n\n    // kinit -k -t <keytab_file> <principal>\n    return run_kinit();\n}\n\nbool init(bool is_server)\n{\n    error_s err = init_kerberos(is_server);\n    if (!err.is_ok()) {\n        derror_f(\"initialize kerberos failed, with err = {}\", err.description());\n        return false;\n    }\n    ddebug(\"initialize kerberos succeed\");\n\n    err = init_sasl(is_server);\n    if (!err.is_ok()) {\n        derror_f(\"initialize sasl failed, with err = {}\", err.description());\n        return false;\n    }\n    ddebug(\"initialize sasl succeed\");\n\n    init_join_point();\n    return true;\n}\n\nbool init_for_zookeeper_client()\n{\n    error_s err = run_kinit();\n    if (!err.is_ok()) {\n        derror_f(\"initialize kerberos failed, with err = {}\", err.description());\n        return false;\n    }\n    ddebug(\"initialize kerberos for zookeeper client succeed\");\n\n    err = init_sasl(false);\n    if (!err.is_ok()) {\n        derror_f(\"initialize sasl failed, with err = {}\", err.description());\n        return false;\n    }\n    ddebug(\"initialize sasl for zookeeper client succeed\");\n    return true;\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/init.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace security {\n// init security(kerberos and sasl)\nbool init(bool is_server);\n\n// init security only for zookeeper client(kerberos and sasl)\nbool init_for_zookeeper_client();\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/kinit_context.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"kinit_context.h\"\n#include \"utils/shared_io_service.h\"\n\n#include <boost/asio/deadline_timer.hpp>\n#include <fmt/format.h>\n#include <krb5/krb5.h>\n\n#include <dsn/utility/defer.h>\n#include <dsn/utils/time_utils.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/rand.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_auth);\nDSN_DECLARE_bool(enable_zookeeper_kerberos);\n\n#define KRB5_RETURN_NOT_OK(err, msg)                                                               \\\n    do {                                                                                           \\\n        krb5_error_code __err_code__ = (err);                                                      \\\n        if (__err_code__ != 0) {                                                                   \\\n            return krb5_call_to_errors(__err_code__, (msg));                                       \\\n        }                                                                                          \\\n    } while (0);\n\nDSN_DEFINE_string(\"security\", krb5_keytab, \"\", \"absolute path of keytab file\");\nDSN_DEFINE_string(\"security\", krb5_config, \"\", \"absolute path of krb5_config file\");\nDSN_DEFINE_string(\"security\", krb5_principal, \"\", \"kerberos principal\");\nDSN_DEFINE_string(\"security\", service_fqdn, \"\", \"the fully qualified domain name of the server\");\nDSN_DEFINE_string(\"security\", service_name, \"\", \"service name\");\n\n// Attention: we can't do these check work by `DSN_DEFINE_validator`, because somebody may don't\n// want to use security, so these configuration may not setted. In this situation, these checks\n// will not pass.\nerror_s check_configuration()\n{\n    dassert(FLAGS_enable_auth || FLAGS_enable_zookeeper_kerberos,\n            \"There is no need to check configuration if FLAGS_enable_auth\"\n            \" and FLAGS_enable_zookeeper_kerberos both are not true\");\n\n    if (0 == strlen(FLAGS_krb5_keytab) || !utils::filesystem::file_exists(FLAGS_krb5_keytab)) {\n        return error_s::make(ERR_INVALID_PARAMETERS,\n                             fmt::format(\"invalid keytab file \\\"{}\\\"\", FLAGS_krb5_keytab));\n    }\n\n    if (0 == strlen(FLAGS_krb5_config) || !utils::filesystem::file_exists(FLAGS_krb5_config)) {\n        return error_s::make(ERR_INVALID_PARAMETERS,\n                             fmt::format(\"invalid krb5 config file \\\"{}\\\"\", FLAGS_krb5_config));\n    }\n\n    if (0 == strlen(FLAGS_krb5_principal)) {\n        return error_s::make(ERR_INVALID_PARAMETERS, \"empty principal\");\n    }\n\n    return error_s::ok();\n}\n\nclass kinit_context : public utils::singleton<kinit_context>\n{\npublic:\n    // implementation of 'kinit -k -t <keytab_file> <principal>'\n    error_s kinit();\n    const std::string &username() const { return _user_name; }\n\nprivate:\n    kinit_context() = default;\n    ~kinit_context();\n\n    // init kerberos context\n    void init_krb5_ctx();\n\n    // get _user_name from _principal\n    error_s parse_username_from_principal();\n\n    // get or renew credentials from KDC and store it to _ccache\n    error_s get_credentials();\n    void schedule_renew_credentials();\n    int32_t get_next_renew_interval();\n\n    error_s wrap_krb5_err(krb5_error_code krb5_err, const std::string &msg);\n    error_s krb5_call_to_errors(krb5_error_code krb5_code, const std::string &prefix_msg);\n\nprivate:\n    krb5_context _krb5_context;\n    // krb5 principal\n    krb5_principal _principal;\n    krb5_keytab _keytab;\n    // credential cache\n    // TODO(zlw): reuse ticket from ccache\n    krb5_ccache _ccache;\n    krb5_get_init_creds_opt *_opt = nullptr;\n\n    // principal and username that logged in as, this determines \"who I am\"\n    std::string _user_name;\n\n    uint64_t _cred_expire_timestamp;\n    std::shared_ptr<boost::asio::deadline_timer> _timer;\n\n    friend class utils::singleton<kinit_context>;\n};\n\nkinit_context::~kinit_context() { krb5_get_init_creds_opt_free(_krb5_context, _opt); }\n\nerror_s kinit_context::kinit()\n{\n    error_s err = check_configuration();\n    if (!err.is_ok()) {\n        return err;\n    }\n\n    // create a krb5 library context.\n    init_krb5_ctx();\n\n    // convert a string principal name to a krb5_principal structure.\n    KRB5_RETURN_NOT_OK(krb5_parse_name(_krb5_context, FLAGS_krb5_principal, &_principal),\n                       \"couldn't parse principal\");\n\n    // get _user_name from _principal\n    RETURN_NOT_OK(parse_username_from_principal());\n\n    // get a handle for a key table.\n    KRB5_RETURN_NOT_OK(krb5_kt_resolve(_krb5_context, FLAGS_krb5_keytab, &_keytab),\n                       \"couldn't resolve keytab file\");\n\n    // acquire credential cache handle\n    KRB5_RETURN_NOT_OK(krb5_cc_default(_krb5_context, &_ccache),\n                       \"couldn't acquire credential cache handle\");\n\n    // initialize credential cache\n    KRB5_RETURN_NOT_OK(krb5_cc_initialize(_krb5_context, _ccache, _principal),\n                       \"initialize credential cache failed\");\n\n    // allocate a new initial credential options structure\n    KRB5_RETURN_NOT_OK(krb5_get_init_creds_opt_alloc(_krb5_context, &_opt),\n                       \"alloc get_init_creds_opt structure failed\");\n\n    // get and schedule to renew credentials from KDC and store it into _ccache\n    RETURN_NOT_OK(get_credentials());\n    schedule_renew_credentials();\n\n    return error_s::ok();\n}\n\nvoid kinit_context::init_krb5_ctx()\n{\n    static std::once_flag once;\n    std::call_once(once, [&]() {\n        int64_t err = krb5_init_context(&_krb5_context);\n        dcheck_eq(err, 0);\n    });\n}\n\nerror_s kinit_context::parse_username_from_principal()\n{\n    // Attention: here we just assume the length of username must be little than 1024\n    const uint16_t BUF_LEN = 1024;\n    char buf[BUF_LEN];\n    krb5_error_code err = krb5_aname_to_localname(_krb5_context, _principal, sizeof(buf), buf);\n\n    // KRB5_LNAME_NOTRANS means no translation available for requested principal\n    if (err == KRB5_LNAME_NOTRANS) {\n        if (_principal->length > 0) {\n            int cnt = 0;\n            while (cnt < _principal->length) {\n                std::string tname;\n                tname.assign((const char *)_principal->data[cnt].data,\n                             _principal->data[cnt].length);\n                if (!_user_name.empty()) {\n                    _user_name += '/';\n                }\n                _user_name += tname;\n                cnt++;\n            }\n            return error_s::ok();\n        }\n        return error_s::make(ERR_KRB5_INTERNAL, \"parse username from principal failed\");\n    }\n\n    // KRB5_CONFIG_NOTENUFSPACE means BUF_LEN is not enough\n    if (err == KRB5_CONFIG_NOTENUFSPACE) {\n        return error_s::make(ERR_KRB5_INTERNAL, fmt::format(\"username is larger than {}\", BUF_LEN));\n    }\n    KRB5_RETURN_NOT_OK(err, \"krb5 parse aname to localname failed\");\n\n    if (strlen(buf) <= 0) {\n        return error_s::make(ERR_KRB5_INTERNAL, \"empty username\");\n    }\n\n    _user_name.assign((const char *)buf);\n    return error_s::ok();\n}\n\nerror_s kinit_context::get_credentials()\n{\n    krb5_creds creds;\n    error_s err = error_s::ok();\n\n    // get initial credentials using a key table\n    // Notice: the contents of a krb5_creds structure need to be freed by ourselves\n    err = wrap_krb5_err(krb5_get_init_creds_keytab(_krb5_context,\n                                                   &creds,\n                                                   _principal,\n                                                   _keytab,\n                                                   0 /*valid from now*/,\n                                                   nullptr /*empty TKT service name*/,\n                                                   _opt),\n                        \"get_init_cred\");\n    if (!err.is_ok()) {\n        dwarn_f(\"get credentials of {} from KDC failed, reason({})\",\n                FLAGS_krb5_principal,\n                err.description());\n        return err;\n    }\n    auto cleanup = dsn::defer([&]() { krb5_free_cred_contents(_krb5_context, &creds); });\n\n    // store credentials into _ccache.\n    err = wrap_krb5_err(krb5_cc_store_cred(_krb5_context, _ccache, &creds), \"store_cred\");\n    if (!err.is_ok()) {\n        dwarn_f(\"store credentials of {} to cache failed, err({})\",\n                FLAGS_krb5_principal,\n                err.description());\n        return err;\n    }\n\n    _cred_expire_timestamp = creds.times.endtime;\n    ddebug_f(\"get credentials of {} from KDC ok, expires at {}\",\n             FLAGS_krb5_principal,\n             utils::time_s_to_date_time(_cred_expire_timestamp));\n    return err;\n}\n\nvoid kinit_context::schedule_renew_credentials()\n{\n    int64_t renew_gap = get_next_renew_interval();\n    ddebug_f(\"schedule to renew credentials in {} seconds later\", renew_gap);\n\n    // why don't we use timers in rDSN framework?\n    //  1. currently the rdsn framework may not started yet.\n    //  2. the rdsn framework is used for codes of a service_app,\n    //     not for codes under service_app\n    if (nullptr == _timer) {\n        _timer.reset(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios));\n    }\n    _timer->expires_from_now(boost::posix_time::seconds(renew_gap));\n    _timer->async_wait([this](const boost::system::error_code &err) {\n        if (!err.failed()) {\n            get_credentials();\n            schedule_renew_credentials();\n        } else if (err == boost::system::errc::operation_canceled) {\n            dwarn(\"the renew credentials timer is cancelled\");\n        } else {\n            dassert_f(false, \"unhandled error({})\", err.message());\n        }\n    });\n}\n\nint32_t kinit_context::get_next_renew_interval()\n{\n    int32_t time_remaining = _cred_expire_timestamp - utils::get_current_physical_time_s();\n\n    // If the time remaining between now and ticket expiry is:\n    // * > 10 minutes:   We attempt to reacquire the ticket between 5 seconds and 5 minutes before\n    // the\n    //                   ticket expires.\n    // * 5 - 10 minutes: We attempt to reacquire the ticket betwen 5 seconds and 1 minute before the\n    //                   ticket expires.\n    // * < 5 minutes:    Attempt to reacquire the ticket every 'time_remaining'.\n    // The jitter is added to make sure that every server doesn't flood the KDC at the same time.\n    if (time_remaining > 600) {\n        return time_remaining - rand::next_u32(5, 300);\n    } else if (time_remaining > 300) {\n        return time_remaining - rand::next_u32(5, 60);\n    }\n    return time_remaining;\n}\n\n// switch krb5_error_code to error_s\nerror_s kinit_context::krb5_call_to_errors(krb5_error_code krb5_code, const std::string &prefix_msg)\n{\n    std::string msg = prefix_msg;\n\n    const char *error_msg = krb5_get_error_message(_krb5_context, krb5_code);\n    msg += error_msg;\n    krb5_free_error_message(_krb5_context, error_msg);\n\n    return error_s::make(ERR_KRB5_INTERNAL, msg);\n}\n\nerror_s kinit_context::wrap_krb5_err(krb5_error_code krb5_err, const std::string &msg)\n{\n    error_s result_err;\n    if (krb5_err != 0) {\n        result_err = krb5_call_to_errors(krb5_err, msg);\n    } else {\n        result_err = error_s::ok();\n    }\n\n    return result_err;\n}\n\nerror_s run_kinit() { return kinit_context::instance().kinit(); }\n\nconst std::string &get_username() { return kinit_context::instance().username(); }\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/kinit_context.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace security {\nextern error_s run_kinit();\nextern const std::string &get_username();\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/meta_access_controller.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"meta_access_controller.h\"\n\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/utility/flags.h>\n#include <dsn/tool-api/network.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DEFINE_string(\"security\",\n                  meta_acl_rpc_allow_list,\n                  \"\",\n                  \"allowed list of rpc codes for meta_access_controller\");\n\nmeta_access_controller::meta_access_controller()\n{\n    // MetaServer serves the allow-list RPC from all users. RPCs unincluded are accessible to only\n    // superusers.\n    if (strlen(FLAGS_meta_acl_rpc_allow_list) == 0) {\n        register_allowed_list(\"RPC_CM_LIST_APPS\");\n        register_allowed_list(\"RPC_CM_LIST_NODES\");\n        register_allowed_list(\"RPC_CM_CLUSTER_INFO\");\n        register_allowed_list(\"RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX\");\n    } else {\n        std::vector<std::string> rpc_code_white_list;\n        utils::split_args(FLAGS_meta_acl_rpc_allow_list, rpc_code_white_list, ',');\n        for (const auto &rpc_code : rpc_code_white_list) {\n            register_allowed_list(rpc_code);\n        }\n    }\n}\n\nbool meta_access_controller::allowed(message_ex *msg)\n{\n    if (pre_check(msg->io_session->get_client_username()) ||\n        _allowed_rpc_code_list.find(msg->rpc_code().code()) != _allowed_rpc_code_list.end()) {\n        return true;\n    }\n    return false;\n}\n\nvoid meta_access_controller::register_allowed_list(const std::string &rpc_code)\n{\n    auto code = task_code::try_get(rpc_code, TASK_CODE_INVALID);\n    dassert_f(code != TASK_CODE_INVALID,\n              \"invalid task code({}) in rpc_code_white_list of security section\",\n              rpc_code);\n\n    _allowed_rpc_code_list.insert(code);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/meta_access_controller.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"access_controller.h\"\n\n#include <unordered_set>\n\nnamespace dsn {\nclass message_ex;\nnamespace security {\n\nclass meta_access_controller : public access_controller\n{\npublic:\n    meta_access_controller();\n    bool allowed(message_ex *msg) override;\n\nprivate:\n    void register_allowed_list(const std::string &rpc_code);\n\n    std::unordered_set<int> _allowed_rpc_code_list;\n};\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/negotiation.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"negotiation.h\"\n#include \"client_negotiation.h\"\n#include \"server_negotiation.h\"\n#include \"negotiation_utils.h\"\n\n#include <dsn/utility/flags.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/dist/fmt_logging.h>\n\nnamespace dsn {\nnamespace security {\n/// TODO(zlw):we can't get string list from cflags now,\n/// so we should get supported mechanisms from config in the later\nconst std::set<std::string> supported_mechanisms{\"GSSAPI\"};\n\nDSN_DEFINE_bool(\"security\", enable_auth, false, \"whether open auth or not\");\nDSN_DEFINE_bool(\"security\",\n                enable_zookeeper_kerberos,\n                false,\n                \"whether to enable kerberos for zookeeper client\");\nDSN_DEFINE_bool(\"security\", mandatory_auth, false, \"wheter to do authertication mandatorily\");\nDSN_TAG_VARIABLE(mandatory_auth, FT_MUTABLE);\n\nnegotiation::~negotiation() {}\n\nstd::unique_ptr<negotiation> create_negotiation(bool is_client, rpc_session *session)\n{\n    if (is_client) {\n        return make_unique<client_negotiation>(session);\n    } else {\n        return make_unique<server_negotiation>(session);\n    }\n}\n\nvoid negotiation::fail_negotiation()\n{\n    _status = negotiation_status::type::SASL_AUTH_FAIL;\n    _session->on_failure(true);\n}\n\nbool negotiation::check_status(negotiation_status::type status,\n                               negotiation_status::type expected_status)\n{\n    if (status != expected_status) {\n        dwarn_f(\"{}: get message({}) while expect({})\",\n                _name,\n                enum_to_string(status),\n                enum_to_string(expected_status));\n        return false;\n    }\n\n    return true;\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/negotiation.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"security_types.h\"\n#include \"sasl_wrapper.h\"\n\n#include <memory>\n#include <dsn/cpp/rpc_holder.h>\n\nnamespace dsn {\nclass rpc_session;\n\nnamespace security {\ntypedef rpc_holder<negotiation_request, negotiation_response> negotiation_rpc;\n\nclass negotiation\n{\npublic:\n    explicit negotiation(rpc_session_ptr session)\n        : _session(std::move(session)), _status(negotiation_status::type::INVALID)\n    {\n        _sasl = create_sasl_wrapper(_session->is_client());\n    }\n\n    virtual ~negotiation() = 0;\n\n    virtual void start() = 0;\n    bool negotiation_succeed() const { return _status == negotiation_status::type::SASL_SUCC; }\n    void fail_negotiation();\n    // check whether the status is equal to expected_status\n    // ret value:\n    //   true:  status == expected_status\n    //   false: status != expected_status\n    bool check_status(negotiation_status::type status, negotiation_status::type expected_status);\n\nprotected:\n    rpc_session_ptr _session;\n    std::string _name;\n    negotiation_status::type _status;\n    std::string _selected_mechanism;\n    std::unique_ptr<sasl_wrapper> _sasl;\n};\n\nstd::unique_ptr<negotiation> create_negotiation(bool is_client, rpc_session *session);\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/negotiation_manager.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"negotiation_manager.h\"\n#include \"negotiation_utils.h\"\n#include \"server_negotiation.h\"\n#include \"client_negotiation.h\"\n\n#include <dsn/utility/flags.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/dist/failure_detector/fd.code.definition.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/http/http_server.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_auth);\nDSN_DECLARE_bool(mandatory_auth);\n\ninline bool is_negotiation_message(dsn::task_code code)\n{\n    return code == RPC_NEGOTIATION || code == RPC_NEGOTIATION_ACK;\n}\n\n// in_white_list returns if the rpc code can be allowed to bypass negotiation.\ninline bool in_white_list(task_code code)\n{\n    return is_negotiation_message(code) || fd::is_failure_detector_message(code) ||\n           is_http_message(code);\n}\n\n/*static*/ negotiation_map negotiation_manager::_negotiations;\n/*static*/ utils::rw_lock_nr negotiation_manager::_lock;\n\nnegotiation_manager::negotiation_manager() : serverlet(\"negotiation_manager\") {}\n\nvoid negotiation_manager::open_service()\n{\n    register_rpc_handler_with_rpc_holder(\n        RPC_NEGOTIATION, \"Negotiation\", &negotiation_manager::on_negotiation_request);\n}\n\nvoid negotiation_manager::on_negotiation_request(negotiation_rpc rpc)\n{\n    dassert(!rpc.dsn_request()->io_session->is_client(),\n            \"only server session receives negotiation request\");\n\n    // reply SASL_AUTH_DISABLE if auth is not enable\n    if (!security::FLAGS_enable_auth) {\n        rpc.response().status = negotiation_status::type::SASL_AUTH_DISABLE;\n        return;\n    }\n\n    std::shared_ptr<negotiation> nego = get_negotiation(rpc);\n    if (nullptr != nego) {\n        auto srv_negotiation = static_cast<server_negotiation *>(nego.get());\n        srv_negotiation->handle_request(rpc);\n    }\n}\n\nvoid negotiation_manager::on_negotiation_response(error_code err, negotiation_rpc rpc)\n{\n    dassert(rpc.dsn_request()->io_session->is_client(),\n            \"only client session receives negotiation response\");\n\n    std::shared_ptr<negotiation> nego = get_negotiation(rpc);\n    if (nullptr != nego) {\n        auto cli_negotiation = static_cast<client_negotiation *>(nego.get());\n        cli_negotiation->handle_response(err, std::move(rpc.response()));\n    }\n}\n\nvoid negotiation_manager::on_rpc_connected(rpc_session *session)\n{\n    std::shared_ptr<negotiation> nego = security::create_negotiation(session->is_client(), session);\n    nego->start();\n    {\n        utils::auto_write_lock l(_lock);\n        _negotiations[session] = std::move(nego);\n    }\n}\n\nvoid negotiation_manager::on_rpc_disconnected(rpc_session *session)\n{\n    {\n        utils::auto_write_lock l(_lock);\n        _negotiations.erase(session);\n    }\n}\n\n// `on_rpc_send_msg` and `on_rpc_recv_msg` will be called by both server and client session.\n// For server session, it can bypass negotiation if mandatory_auth is false.\n// mandatory_auth is a server-side config only, it doesn't have the same effect for\n// client session.\nbool negotiation_manager::on_rpc_recv_msg(message_ex *msg)\n{\n    if (!msg->io_session->is_client() && !FLAGS_mandatory_auth) {\n        // if this is server_session and mandatory_auth is turned off.\n        return true;\n    }\n\n    return dsn_likely(msg->io_session->is_negotiation_succeed()) || in_white_list(msg->rpc_code());\n}\n\nbool negotiation_manager::on_rpc_send_msg(message_ex *msg)\n{\n    if (!msg->io_session->is_client() && !FLAGS_mandatory_auth) {\n        // if this is server_session and mandatory_auth is turned off.\n        return true;\n    }\n\n    // if try_pend_message return true, it means the msg is pended to the resend message queue\n    return in_white_list(msg->rpc_code()) || !msg->io_session->try_pend_message(msg);\n}\n\nstd::shared_ptr<negotiation> negotiation_manager::get_negotiation(negotiation_rpc rpc)\n{\n    utils::auto_read_lock l(_lock);\n    auto it = _negotiations.find(rpc.dsn_request()->io_session);\n    if (it == _negotiations.end()) {\n        ddebug_f(\"negotiation was removed for msg: {}, {}\",\n                 rpc.dsn_request()->rpc_code().to_string(),\n                 rpc.remote_address().to_string());\n        return nullptr;\n    }\n\n    return it->second;\n}\n\nvoid init_join_point()\n{\n    rpc_session::on_rpc_session_connected.put_back(negotiation_manager::on_rpc_connected,\n                                                   \"security\");\n    rpc_session::on_rpc_session_disconnected.put_back(negotiation_manager::on_rpc_disconnected,\n                                                      \"security\");\n    rpc_session::on_rpc_recv_message.put_native(negotiation_manager::on_rpc_recv_msg);\n    rpc_session::on_rpc_send_message.put_native(negotiation_manager::on_rpc_send_msg);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/negotiation_manager.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"server_negotiation.h\"\n\n#include <dsn/cpp/serverlet.h>\n\nnamespace dsn {\nnamespace security {\ntypedef std::unordered_map<rpc_session *, std::shared_ptr<negotiation>> negotiation_map;\n\nclass negotiation_manager : public serverlet<negotiation_manager>,\n                            public utils::singleton<negotiation_manager>\n{\npublic:\n    static void on_rpc_connected(rpc_session *session);\n    static void on_rpc_disconnected(rpc_session *session);\n    static bool on_rpc_recv_msg(message_ex *msg);\n    static bool on_rpc_send_msg(message_ex *msg);\n    static void on_negotiation_response(error_code err, negotiation_rpc rpc);\n\n    void open_service();\n\nprivate:\n    negotiation_manager();\n    ~negotiation_manager() = default;\n\n    void on_negotiation_request(negotiation_rpc rpc);\n    static std::shared_ptr<negotiation> get_negotiation(negotiation_rpc rpc);\n\n    friend class utils::singleton<negotiation_manager>;\n    friend class negotiation_manager_test;\n\n    static utils::rw_lock_nr _lock; // [\n    static negotiation_map _negotiations;\n    //]\n};\n\nvoid init_join_point();\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/negotiation_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"security_types.h\"\n\nnamespace dsn {\nnamespace security {\n\n// TODO(wutao): rename to negotiation_status_to_string\ninline const char *enum_to_string(negotiation_status::type s)\n{\n    switch (s) {\n    case negotiation_status::type::SASL_LIST_MECHANISMS:\n        return \"negotiation_list_mechanisms\";\n    case negotiation_status::type::SASL_LIST_MECHANISMS_RESP:\n        return \"negotiation_list_mechanisms_resp\";\n    case negotiation_status::type::SASL_SELECT_MECHANISMS:\n        return \"negotiation_select_mechanisms\";\n    case negotiation_status::type::SASL_SELECT_MECHANISMS_RESP:\n        return \"negotiation_select_mechanisms_resp\";\n    case negotiation_status::type::SASL_SUCC:\n        return \"negotiation_succ\";\n    case negotiation_status::type::SASL_AUTH_FAIL:\n        return \"negotiation_auth_fail\";\n    case negotiation_status::type::SASL_INITIATE:\n        return \"negotiation_initiate\";\n    case negotiation_status::type::SASL_CHALLENGE:\n        return \"negotiation_challenge\";\n    case negotiation_status::type::SASL_CHALLENGE_RESP:\n        return \"negotiation_challenge_response\";\n    case negotiation_status::type::SASL_AUTH_DISABLE:\n        return \"negotiation_auth_disable\";\n    case negotiation_status::type::INVALID:\n        return \"negotiation_invalid\";\n    default:\n        return \"negotiation-unknown\";\n    }\n}\n\nDEFINE_TASK_CODE_RPC(RPC_NEGOTIATION, TASK_PRIORITY_COMMON, dsn::THREAD_POOL_DEFAULT)\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/replica_access_controller.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"replica_access_controller.h\"\n\n#include <dsn/tool-api/rpc_message.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/network.h>\n\nnamespace dsn {\nnamespace security {\nreplica_access_controller::replica_access_controller(const std::string &name) { _name = name; }\n\nbool replica_access_controller::allowed(message_ex *msg)\n{\n    const std::string &user_name = msg->io_session->get_client_username();\n    if (pre_check(user_name)) {\n        return true;\n    }\n\n    {\n        utils::auto_read_lock l(_lock);\n        // If the user didn't specify any ACL, it means this table is publicly accessible to\n        // everyone. This is a backdoor to allow old-version clients to gracefully upgrade. After\n        // they are finally ensured to be fully upgraded, they can specify some usernames to ACL and\n        // the table will be truly protected.\n        if (!_users.empty() && _users.find(user_name) == _users.end()) {\n            ddebug_f(\"{}: user_name {} doesn't exist in acls map\", _name, user_name);\n            return false;\n        }\n        return true;\n    }\n}\n\nvoid replica_access_controller::update(const std::string &users)\n{\n    {\n        // check to see whether we should update it or not.\n        utils::auto_read_lock l(_lock);\n        if (_env_users == users) {\n            return;\n        }\n    }\n\n    std::unordered_set<std::string> users_set;\n    utils::split_args(users.c_str(), users_set, ',');\n    {\n        utils::auto_write_lock l(_lock);\n        // This swap operation is in constant time\n        _users.swap(users_set);\n        _env_users = users;\n    }\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/replica_access_controller.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include \"access_controller.h\"\n\nnamespace dsn {\nnamespace security {\nclass replica_access_controller : public access_controller\n{\npublic:\n    explicit replica_access_controller(const std::string &name);\n    bool allowed(message_ex *msg) override;\n    void update(const std::string &users) override;\n\nprivate:\n    utils::rw_lock_nr _lock; // [\n    std::unordered_set<std::string> _users;\n    std::string _env_users;\n    // ]\n    std::string _name;\n\n    friend class replica_access_controller_test;\n};\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_client_wrapper.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"sasl_client_wrapper.h\"\n\n#include <sasl/sasl.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_string(service_fqdn);\nDSN_DECLARE_string(service_name);\n\nerror_s sasl_client_wrapper::init()\n{\n    FAIL_POINT_INJECT_F(\"sasl_client_wrapper_init\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    int sasl_err = sasl_client_new(\n        FLAGS_service_name, FLAGS_service_fqdn, nullptr, nullptr, nullptr, 0, &_conn);\n    return wrap_error(sasl_err);\n}\n\nerror_s sasl_client_wrapper::start(const std::string &mechanism, const blob &input, blob &output)\n{\n    FAIL_POINT_INJECT_F(\"sasl_client_wrapper_start\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    const char *msg = nullptr;\n    unsigned msg_len = 0;\n    const char *client_mech = nullptr;\n    int sasl_err =\n        sasl_client_start(_conn, mechanism.c_str(), nullptr, &msg, &msg_len, &client_mech);\n\n    output = blob::create_from_bytes(msg, msg_len);\n    return wrap_error(sasl_err);\n}\n\nerror_s sasl_client_wrapper::step(const blob &input, blob &output)\n{\n    FAIL_POINT_INJECT_F(\"sasl_client_wrapper_step\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    const char *msg = nullptr;\n    unsigned msg_len = 0;\n    int sasl_err = sasl_client_step(_conn, input.data(), input.length(), nullptr, &msg, &msg_len);\n\n    output = blob::create_from_bytes(msg, msg_len);\n    return wrap_error(sasl_err);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_client_wrapper.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"sasl_wrapper.h\"\n\nnamespace dsn {\nnamespace security {\n\n// sasl_client_wrapper is a simple wrapper over cyrus-sasl's sasl_client_xxx API.\nclass sasl_client_wrapper : public sasl_wrapper\n{\npublic:\n    sasl_client_wrapper() = default;\n    ~sasl_client_wrapper() override = default;\n\n    error_s init();\n    error_s start(const std::string &mechanism, const blob &input, blob &output);\n    error_s step(const blob &input, blob &output);\n};\n\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_init.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"sasl_init.h\"\n#include \"kinit_context.h\"\n\n#include <sasl/sasl.h>\n#include <sasl/saslplug.h>\n#include <functional>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/synchronize.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DEFINE_string(\"security\", sasl_plugin_path, \"/usr/lib/sasl2\", \"path to search sasl plugins\");\n\ndsn_log_level_t get_dsn_log_level(int level)\n{\n    // The log levels of LOG_LEVEL_DEBUG and LOG_LEVEL_INFORMATION are in reverse order.\n    // So here we should compatible with this case.\n    switch (level) {\n    case SASL_LOG_ERR:\n        return LOG_LEVEL_ERROR;\n    case SASL_LOG_FAIL:\n    case SASL_LOG_WARN:\n        return LOG_LEVEL_WARNING;\n    case SASL_LOG_NOTE:\n        return LOG_LEVEL_DEBUG;\n    default:\n        return LOG_LEVEL_INFORMATION;\n    }\n}\n\nint sasl_simple_logger(void *context, int level, const char *msg)\n{\n    if (SASL_LOG_NONE == level || nullptr == msg) {\n        return SASL_OK;\n    }\n\n    dlog_f(get_dsn_log_level(level), \"sasl log info: {}\", msg);\n    return SASL_OK;\n}\n\nint sasl_get_path(void *context, char **path)\n{\n    if (nullptr == path) {\n        return SASL_BADPARAM;\n    }\n    *path = const_cast<char *>(FLAGS_sasl_plugin_path);\n    return SASL_OK;\n}\n\nint sasl_get_username(void *context, int id, const char **result, unsigned *len)\n{\n    if (nullptr == result) {\n        return SASL_BADPARAM;\n    }\n    static const std::string username = get_username();\n    switch (id) {\n    case SASL_CB_USER:\n    case SASL_CB_AUTHNAME:\n        *result = username.c_str();\n        if (len != nullptr) {\n            *len = username.length();\n        }\n        return SASL_OK;\n    default:\n        dassert_f(false, \"unexpected SASL callback type: {}\", id);\n        return SASL_BADPARAM;\n    }\n}\n\nsasl_callback_t client_callbacks[] = {\n    {SASL_CB_USER, (sasl_callback_ft)&sasl_get_username, nullptr},\n    {SASL_CB_GETPATH, (sasl_callback_ft)&sasl_get_path, nullptr},\n    {SASL_CB_AUTHNAME, (sasl_callback_ft)&sasl_get_username, nullptr},\n    {SASL_CB_LOG, (sasl_callback_ft)&sasl_simple_logger, nullptr},\n    {SASL_CB_LIST_END, nullptr, nullptr}};\n\nsasl_callback_t server_callbacks[] = {{SASL_CB_LOG, (sasl_callback_ft)&sasl_simple_logger, nullptr},\n                                      {SASL_CB_GETPATH, (sasl_callback_ft)&sasl_get_path, nullptr},\n                                      {SASL_CB_LIST_END, nullptr, nullptr}};\n\n// provide mutex function for sasl\nvoid *sasl_mutex_alloc_local() { return static_cast<void *>(new utils::ex_lock_nr); }\n\nvoid sasl_mutex_free_local(void *m) { delete static_cast<utils::ex_lock_nr *>(m); }\n\nint sasl_mutex_lock_local(void *m)\n{\n    static_cast<utils::ex_lock_nr *>(m)->lock();\n    return 0;\n}\n\nint sasl_mutex_unlock_local(void *m)\n{\n    static_cast<utils::ex_lock_nr *>(m)->unlock();\n    return 0;\n}\n\nvoid sasl_set_mutex_local()\n{\n    // sasl_set_mutex is a function in <sasl/sasl.h>\n    sasl_set_mutex(&sasl_mutex_alloc_local,\n                   &sasl_mutex_lock_local,\n                   &sasl_mutex_unlock_local,\n                   &sasl_mutex_free_local);\n}\n\nerror_s init_sasl(bool is_server)\n{\n    // server is also a client to other server.\n    // for example: replica server is a client of meta server.\n    sasl_set_mutex_local();\n    int err = sasl_client_init(&client_callbacks[0]);\n    error_s ret = error_s::make(ERR_OK);\n    if (err != SASL_OK) {\n        ret = error_s::make(ERR_SASL_INTERNAL);\n        ret << \"initialize sasl client failed with error: \"\n            << sasl_errstring(err, nullptr, nullptr);\n        return ret;\n    }\n    if (is_server) {\n        err = sasl_server_init(&server_callbacks[0], \"pegasus\");\n        if (err != SASL_OK) {\n            ret = error_s::make(ERR_SASL_INTERNAL);\n            ret << \"initialize sasl server failed with error: \"\n                << sasl_errstring(err, nullptr, nullptr);\n            return ret;\n        }\n    }\n    return ret;\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_init.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace security {\n// you must have already initialized kerberos before call init_sasl\nerror_s init_sasl(bool is_server);\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_server_wrapper.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"sasl_server_wrapper.h\"\n\n#include <sasl/sasl.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_string(service_fqdn);\nDSN_DECLARE_string(service_name);\n\nerror_s sasl_server_wrapper::init()\n{\n    FAIL_POINT_INJECT_F(\"sasl_server_wrapper_init\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    int sasl_err = sasl_server_new(\n        FLAGS_service_name, FLAGS_service_fqdn, nullptr, nullptr, nullptr, nullptr, 0, &_conn);\n    return wrap_error(sasl_err);\n}\n\nerror_s sasl_server_wrapper::start(const std::string &mechanism, const blob &input, blob &output)\n{\n    FAIL_POINT_INJECT_F(\"sasl_server_wrapper_start\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    const char *msg = nullptr;\n    unsigned msg_len = 0;\n    int sasl_err =\n        sasl_server_start(_conn, mechanism.c_str(), input.data(), input.length(), &msg, &msg_len);\n\n    output = blob::create_from_bytes(msg, msg_len);\n    return wrap_error(sasl_err);\n}\n\nerror_s sasl_server_wrapper::step(const blob &input, blob &output)\n{\n    FAIL_POINT_INJECT_F(\"sasl_server_wrapper_step\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    const char *msg = nullptr;\n    unsigned msg_len = 0;\n    int sasl_err = sasl_server_step(_conn, input.data(), input.length(), &msg, &msg_len);\n\n    output = blob::create_from_bytes(msg, msg_len);\n    return wrap_error(sasl_err);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_server_wrapper.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"sasl_wrapper.h\"\n\nnamespace dsn {\nnamespace security {\nclass sasl_server_wrapper : public sasl_wrapper\n{\npublic:\n    sasl_server_wrapper() = default;\n    ~sasl_server_wrapper() = default;\n\n    error_s init();\n    error_s start(const std::string &mechanism, const blob &input, blob &output);\n    error_s step(const blob &input, blob &output);\n};\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_wrapper.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"sasl_wrapper.h\"\n#include \"sasl_server_wrapper.h\"\n#include \"sasl_client_wrapper.h\"\n\n#include <sasl/sasl.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nconst char *sasl_err_desc(int status, sasl_conn_t *conn)\n{\n    if (conn != nullptr) {\n        return sasl_errdetail(conn);\n    }\n    return sasl_errstring(status, nullptr, nullptr);\n}\n\nsasl_wrapper::~sasl_wrapper()\n{\n    if (nullptr != _conn) {\n        sasl_dispose(&_conn);\n    }\n}\n\nerror_s sasl_wrapper::retrieve_username(std::string &output)\n{\n    FAIL_POINT_INJECT_F(\"sasl_wrapper_retrieve_username\", [](dsn::string_view str) {\n        error_code err = error_code::try_get(str.data(), ERR_UNKNOWN);\n        return error_s::make(err);\n    });\n\n    // retrieve username from _conn.\n    // If this is a sasl server, it gets the name of the corresponding sasl client.\n    // But if this is a sasl client, it gets the name of itself\n    char *username = nullptr;\n    error_s err_s = wrap_error(sasl_getprop(_conn, SASL_USERNAME, (const void **)&username));\n    if (err_s.is_ok()) {\n        output = username;\n        output = output.substr(0, output.find_last_of('@'));\n        output = output.substr(0, output.find_first_of('/'));\n    }\n    return err_s;\n}\n\nerror_s sasl_wrapper::wrap_error(int sasl_err)\n{\n    error_s ret;\n    switch (sasl_err) {\n    case SASL_OK:\n        return error_s::make(ERR_OK);\n    case SASL_CONTINUE:\n        return error_s::make(ERR_SASL_INCOMPLETE);\n    case SASL_FAIL:      // Generic failure (encompasses missing krb5 credentials).\n    case SASL_BADAUTH:   // Authentication failure.\n    case SASL_BADMAC:    // Decode failure.\n    case SASL_NOAUTHZ:   // Authorization failure.\n    case SASL_NOUSER:    // User not found.\n    case SASL_WRONGMECH: // Server doesn't support requested mechanism.\n    case SASL_BADSERV: { // Server failed mutual authentication.\n        ret = error_s::make(ERR_SASL_INTERNAL);\n        ret << \"sasl auth failed, error: \" << sasl_err_desc(sasl_err, _conn);\n        break;\n    }\n    default:\n        ret = error_s::make(ERR_UNKNOWN);\n        break;\n    }\n    return ret;\n}\n\nstd::unique_ptr<sasl_wrapper> create_sasl_wrapper(bool is_client)\n{\n    if (is_client) {\n        return make_unique<sasl_client_wrapper>();\n    } else {\n        return make_unique<sasl_server_wrapper>();\n    }\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/sasl_wrapper.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/errors.h>\n\ntypedef struct sasl_conn sasl_conn_t;\n\nnamespace dsn {\nnamespace security {\nclass sasl_wrapper\n{\npublic:\n    virtual ~sasl_wrapper();\n\n    virtual error_s init() = 0;\n    virtual error_s start(const std::string &mechanism, const blob &input, blob &output) = 0;\n    virtual error_s step(const blob &input, blob &output) = 0;\n    /**\n     * retrieve username from sasl connection.\n     * If this is a sasl server, it gets the name of the corresponding sasl client.\n     * But if this is a sasl client, it gets the name of itself\n     **/\n    error_s retrieve_username(/*out*/ std::string &output);\n\nprotected:\n    sasl_wrapper() = default;\n\n    // wrap_error wraps a sasl error with full description.\n    error_s wrap_error(int sasl_err);\n\n    sasl_conn_t *_conn = nullptr;\n};\n\nstd::unique_ptr<sasl_wrapper> create_sasl_wrapper(bool is_client);\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/security.thrift",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\ninclude \"../../dsn.thrift\"\n\nnamespace cpp dsn.security\n\n// negotiation process:\n//\n//                       client                              server\n//                          | ---    SASL_LIST_MECHANISMS     --> |\n//                          | <--  SASL_LIST_MECHANISMS_RESP  --- |\n//                          | --     SASL_SELECT_MECHANISMS   --> |\n//                          | <-- SASL_SELECT_MECHANISMS_RESP --- |\n//                          |                                     |\n//                          | ---       SASL_INITIATE         --> |\n//                          |                                     |\n//                          | <--       SASL_CHALLENGE        --- |\n//                          | ---     SASL_CHALLENGE_RESP     --> |\n//                          |                                     |\n//                          |               .....                 |\n//                          |                                     |\n//                          | <--       SASL_CHALLENGE        --- |\n//                          | ---     SASL_CHALLENGE_RESP     --> |\n//                          |                                     | (authentication will succeed\n//                          |                                     |  if all challenges passed)\n//                          | <--         SASL_SUCC           --- |\n// (client won't response   |                                     |\n// if servers says ok)      |                                     |\n//                          | ---         RPC_CALL           ---> |\n//                          | <--         RPC_RESP           ---- |\n\nenum negotiation_status {\n    INVALID\n    SASL_LIST_MECHANISMS\n    SASL_LIST_MECHANISMS_RESP\n    SASL_SELECT_MECHANISMS\n    SASL_SELECT_MECHANISMS_RESP\n    SASL_INITIATE\n    SASL_CHALLENGE\n    SASL_CHALLENGE_RESP\n    SASL_SUCC\n    SASL_AUTH_DISABLE\n    SASL_AUTH_FAIL\n}\n\nstruct negotiation_request {\n    1: negotiation_status status;\n    2: dsn.blob msg;\n}\n\nstruct negotiation_response {\n    1: negotiation_status status;\n    2: dsn.blob msg;\n}\n"
  },
  {
    "path": "src/runtime/security/server_negotiation.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"server_negotiation.h\"\n#include \"negotiation_utils.h\"\n#include \"sasl_init.h\"\n\n#include <boost/algorithm/string/join.hpp>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_string(service_fqdn);\nDSN_DECLARE_string(service_name);\n\nserver_negotiation::server_negotiation(rpc_session_ptr session) : negotiation(session)\n{\n    _name = fmt::format(\"SERVER_NEGOTIATION(CLIENT={})\", _session->remote_address().to_string());\n}\n\nvoid server_negotiation::start()\n{\n    _status = negotiation_status::type::SASL_LIST_MECHANISMS;\n    ddebug_f(\"{}: start negotiation\", _name);\n}\n\nvoid server_negotiation::handle_request(negotiation_rpc rpc)\n{\n    switch (_status) {\n    case negotiation_status::type::SASL_LIST_MECHANISMS:\n        on_list_mechanisms(rpc);\n        break;\n    case negotiation_status::type::SASL_LIST_MECHANISMS_RESP:\n        on_select_mechanism(rpc);\n        break;\n    case negotiation_status::type::SASL_SELECT_MECHANISMS_RESP:\n        on_initiate(rpc);\n        break;\n    case negotiation_status::type::SASL_CHALLENGE:\n        on_challenge_resp(rpc);\n        break;\n    default:\n        fail_negotiation();\n    }\n}\n\nvoid server_negotiation::on_list_mechanisms(negotiation_rpc rpc)\n{\n    if (!check_status(rpc.request().status, negotiation_status::type::SASL_LIST_MECHANISMS)) {\n        fail_negotiation();\n        return;\n    }\n\n    std::string mech_list = boost::join(supported_mechanisms, \",\");\n    negotiation_response &response = rpc.response();\n    _status = response.status = negotiation_status::type::SASL_LIST_MECHANISMS_RESP;\n    response.msg = blob::create_from_bytes(mech_list.data(), mech_list.length());\n}\n\nvoid server_negotiation::on_select_mechanism(negotiation_rpc rpc)\n{\n    const negotiation_request &request = rpc.request();\n    if (!check_status(rpc.request().status, negotiation_status::type::SASL_SELECT_MECHANISMS)) {\n        fail_negotiation();\n        return;\n    }\n\n    _selected_mechanism = request.msg.to_string();\n    if (supported_mechanisms.find(_selected_mechanism) == supported_mechanisms.end()) {\n        dwarn_f(\"the mechanism of {} is not supported\", _selected_mechanism);\n        fail_negotiation();\n        return;\n    }\n\n    error_s err_s = _sasl->init();\n    if (!err_s.is_ok()) {\n        dwarn_f(\"{}: server initialize sasl failed, error = {}, msg = {}\",\n                _name,\n                err_s.code().to_string(),\n                err_s.description());\n        fail_negotiation();\n        return;\n    }\n\n    negotiation_response &response = rpc.response();\n    _status = response.status = negotiation_status::type::SASL_SELECT_MECHANISMS_RESP;\n}\n\nvoid server_negotiation::on_initiate(negotiation_rpc rpc)\n{\n    const negotiation_request &request = rpc.request();\n    if (!check_status(request.status, negotiation_status::type::SASL_INITIATE)) {\n        fail_negotiation();\n        return;\n    }\n\n    blob start_output;\n    error_s err_s = _sasl->start(_selected_mechanism, request.msg, start_output);\n    return do_challenge(rpc, err_s, start_output);\n}\n\nvoid server_negotiation::on_challenge_resp(negotiation_rpc rpc)\n{\n    const negotiation_request &request = rpc.request();\n    if (!check_status(request.status, negotiation_status::type::SASL_CHALLENGE_RESP)) {\n        fail_negotiation();\n        return;\n    }\n\n    blob resp_msg;\n    error_s err_s = _sasl->step(request.msg, resp_msg);\n    return do_challenge(rpc, err_s, resp_msg);\n}\n\nvoid server_negotiation::do_challenge(negotiation_rpc rpc, error_s err_s, const blob &resp_msg)\n{\n    if (!err_s.is_ok() && err_s.code() != ERR_SASL_INCOMPLETE) {\n        dwarn_f(\"{}: negotiation failed, with err = {}, msg = {}\",\n                _name,\n                err_s.code().to_string(),\n                err_s.description());\n        fail_negotiation();\n        return;\n    }\n\n    if (err_s.is_ok()) {\n        std::string user_name;\n        auto retrive_err = _sasl->retrieve_username(user_name);\n        if (retrive_err.is_ok()) {\n            succ_negotiation(rpc, user_name);\n        } else {\n            dwarn_f(\"{}: retrive user name failed: with err = {}, msg = {}\",\n                    _name,\n                    retrive_err.code().to_string(),\n                    retrive_err.description());\n            fail_negotiation();\n        }\n    } else {\n        negotiation_response &challenge = rpc.response();\n        _status = challenge.status = negotiation_status::type::SASL_CHALLENGE;\n        challenge.msg = resp_msg;\n    }\n}\n\nvoid server_negotiation::succ_negotiation(negotiation_rpc rpc, const std::string &user_name)\n{\n    negotiation_response &response = rpc.response();\n    _status = response.status = negotiation_status::type::SASL_SUCC;\n    _session->set_client_username(user_name);\n    _session->set_negotiation_succeed();\n    ddebug_f(\"{}: negotiation succeed\", _name);\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/security/server_negotiation.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include \"negotiation.h\"\n\n#include <dsn/utility/errors.h>\n\nnamespace dsn {\nnamespace security {\nextern const std::set<std::string> supported_mechanisms;\n\n// server_negotiation negotiates a session on server side.\nclass server_negotiation : public negotiation\n{\npublic:\n    explicit server_negotiation(rpc_session_ptr session);\n\n    void start() override;\n\n    // handle_request handles negotiate_request from the session.\n    void handle_request(negotiation_rpc rpc);\n\nprivate:\n    void on_list_mechanisms(negotiation_rpc rpc);\n    void on_select_mechanism(negotiation_rpc rpc);\n    void on_initiate(negotiation_rpc rpc);\n    void on_challenge_resp(negotiation_rpc rpc);\n\n    void do_challenge(negotiation_rpc rpc, error_s err_s, const blob &resp_msg);\n    void succ_negotiation(negotiation_rpc rpc, const std::string &user_name);\n\n    friend class server_negotiation_test;\n};\n\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/service_api_c.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"service_engine.h\"\n#include \"utils/coredump.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n#include \"runtime/task/task_engine.h\"\n#include \"runtime/security/init.h\"\n\n#include <fstream>\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool_api.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/cpp/serialization.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utils/time_utils.h>\n#include <dsn/utility/errors.h>\n#include <dsn/dist/fmt_logging.h>\n\n#ifdef DSN_ENABLE_GPERF\n#include <gperftools/malloc_extension.h>\n#endif\n\n#include \"service_engine.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n#include \"runtime/task/task_engine.h\"\n#include \"utils/coredump.h\"\n#include \"runtime/security/negotiation_manager.h\"\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_auth);\nDSN_DECLARE_bool(enable_zookeeper_kerberos);\n} // namespace security\n} // namespace dsn\n//\n// global state\n//\nstatic struct _all_info_\n{\n    unsigned int magic;\n    bool engine_ready;\n    bool config_completed;\n    std::unique_ptr<::dsn::tools::tool_app> tool;\n    ::dsn::service_engine *engine;\n    std::vector<::dsn::task_spec *> task_specs;\n\n    bool is_config_completed() const { return magic == 0xdeadbeef && config_completed; }\n\n    bool is_engine_ready() const { return magic == 0xdeadbeef && engine_ready; }\n\n} dsn_all;\n\nDSN_API volatile int *dsn_task_queue_virtual_length_ptr(dsn::task_code code, int hash)\n{\n    return dsn::task::get_current_node()->computation()->get_task_queue_virtual_length_ptr(code,\n                                                                                           hash);\n}\n\nDSN_API bool dsn_task_is_running_inside(dsn::task *t)\n{\n    return ::dsn::task::get_current_task() == t;\n}\n\nDSN_API void dsn_coredump()\n{\n    ::dsn::utils::coredump::write();\n    ::abort();\n}\n\n//------------------------------------------------------------------------------\n//\n// rpc\n//\n//------------------------------------------------------------------------------\n\n// rpc calls\nDSN_API dsn::rpc_address dsn_primary_address()\n{\n    return ::dsn::task::get_current_rpc()->primary_address();\n}\n\nDSN_API bool dsn_rpc_register_handler(dsn::task_code code,\n                                      const char *extra_name,\n                                      const dsn::rpc_request_handler &cb)\n{\n    return ::dsn::task::get_current_node()->rpc_register_handler(code, extra_name, cb);\n}\n\nDSN_API bool dsn_rpc_unregiser_handler(dsn::task_code code)\n{\n    return ::dsn::task::get_current_node()->rpc_unregister_handler(code);\n}\n\nDSN_API void dsn_rpc_call(dsn::rpc_address server, dsn::rpc_response_task *rpc_call)\n{\n    dassert(rpc_call->spec().type == TASK_TYPE_RPC_RESPONSE,\n            \"invalid task_type, type = %s\",\n            enum_to_string(rpc_call->spec().type));\n\n    auto msg = rpc_call->get_request();\n    msg->server_address = server;\n    ::dsn::task::get_current_rpc()->call(msg, dsn::rpc_response_task_ptr(rpc_call));\n}\n\nDSN_API dsn::message_ex *dsn_rpc_call_wait(dsn::rpc_address server, dsn::message_ex *request)\n{\n    auto msg = ((::dsn::message_ex *)request);\n    msg->server_address = server;\n\n    ::dsn::rpc_response_task *rtask = new ::dsn::rpc_response_task(msg, nullptr, 0);\n    rtask->add_ref();\n    ::dsn::task::get_current_rpc()->call(msg, dsn::rpc_response_task_ptr(rtask));\n    rtask->wait();\n    if (rtask->error() == ::dsn::ERR_OK) {\n        auto msg = rtask->get_response();\n        msg->add_ref();       // released by callers\n        rtask->release_ref(); // added above\n        return msg;\n    } else {\n        rtask->release_ref(); // added above\n        return nullptr;\n    }\n}\n\nDSN_API void dsn_rpc_call_one_way(dsn::rpc_address server, dsn::message_ex *request)\n{\n    auto msg = ((::dsn::message_ex *)request);\n    msg->server_address = server;\n\n    ::dsn::task::get_current_rpc()->call(msg, nullptr);\n}\n\nDSN_API void dsn_rpc_reply(dsn::message_ex *response, dsn::error_code err)\n{\n    auto msg = ((::dsn::message_ex *)response);\n    ::dsn::task::get_current_rpc()->reply(msg, err);\n}\n\nDSN_API void dsn_rpc_forward(dsn::message_ex *request, dsn::rpc_address addr)\n{\n    ::dsn::task::get_current_rpc()->forward((::dsn::message_ex *)(request),\n                                            ::dsn::rpc_address(addr));\n}\n\n//------------------------------------------------------------------------------\n//\n// system\n//\n//------------------------------------------------------------------------------\n\nstatic bool\nrun(const char *config_file, const char *config_arguments, bool is_server, std::string &app_list);\n\nDSN_API bool dsn_run_config(const char *config, bool is_server)\n{\n    std::string name;\n    return run(config, nullptr, is_server, name);\n}\n\nNORETURN DSN_API void dsn_exit(int code)\n{\n    printf(\"dsn exit with code %d\\n\", code);\n    fflush(stdout);\n    ::dsn::tools::sys_exit.execute(::dsn::SYS_EXIT_NORMAL);\n\n    _exit(code);\n}\n\nDSN_API bool dsn_mimic_app(const char *app_role, int index)\n{\n    auto worker = ::dsn::task::get_current_worker2();\n    dassert(worker == nullptr, \"cannot call dsn_mimic_app in rDSN threads\");\n\n    auto cnode = ::dsn::task::get_current_node2();\n    if (cnode != nullptr) {\n        const std::string &name = cnode->spec().full_name;\n        if (cnode->spec().role_name == std::string(app_role) && cnode->spec().index == index) {\n            return true;\n        } else {\n            derror(\"current thread is already attached to another rDSN app %s\", name.c_str());\n            return false;\n        }\n    }\n\n    const auto &nodes = dsn::service_engine::instance().get_all_nodes();\n    for (const auto &n : nodes) {\n        if (n.second->spec().role_name == std::string(app_role) &&\n            n.second->spec().index == index) {\n            ::dsn::task::set_tls_dsn_context(n.second.get(), nullptr);\n            return true;\n        }\n    }\n\n    derror(\"cannot find host app %s with index %d\", app_role, index);\n    return false;\n}\n\n//\n// run the system with arguments\n//   config [-cargs k1=v1;k2=v2] [-app_list app_name1@index1;app_name2@index]\n// e.g., config.ini -app_list replica@1 to start the first replica as a new process\n//       config.ini -app_list replica to start ALL replicas (count specified in config) as a new\n//       process\n//       config.ini -app_list replica -cargs replica-port=34556 to start ALL replicas with given\n//       port variable specified in config.ini\n//       config.ini to start ALL apps as a new process\n//\nDSN_API void dsn_run(int argc, char **argv, bool is_server)\n{\n    if (argc < 2) {\n        printf(\n            \"invalid options for dsn_run\\n\"\n            \"// run the system with arguments\\n\"\n            \"//   config [-cargs k1=v1;k2=v2] [-app_list app_name1@index1;app_name2@index]\\n\"\n            \"// e.g., config.ini -app_list replica@1 to start the first replica as a new process\\n\"\n            \"//       config.ini -app_list replica to start ALL replicas (count specified in \"\n            \"config) as a new process\\n\"\n            \"//       config.ini -app_list replica -cargs replica-port=34556 to start with \"\n            \"%%replica-port%% var in config.ini\\n\"\n            \"//       config.ini to start ALL apps as a new process\\n\");\n        dsn_exit(1);\n        return;\n    }\n\n    char *config = argv[1];\n    std::string config_args = \"\";\n    std::string app_list = \"\";\n\n    for (int i = 2; i < argc;) {\n        if (0 == strcmp(argv[i], \"-cargs\")) {\n            if (++i < argc) {\n                config_args = std::string(argv[i++]);\n            }\n        }\n\n        else if (0 == strcmp(argv[i], \"-app_list\")) {\n            if (++i < argc) {\n                app_list = std::string(argv[i++]);\n            }\n        } else {\n            printf(\"unknown arguments %s\\n\", argv[i]);\n            dsn_exit(1);\n            return;\n        }\n    }\n\n    if (!run(config, config_args.size() > 0 ? config_args.c_str() : nullptr, is_server, app_list)) {\n        printf(\"run the system failed\\n\");\n        dsn_exit(-1);\n        return;\n    }\n}\n\nnamespace dsn {\nnamespace tools {\n\nbool is_engine_ready() { return dsn_all.is_engine_ready(); }\n\ntool_app *get_current_tool() { return dsn_all.tool.get(); }\n\n} // namespace tools\n} // namespace dsn\n\nextern void dsn_core_init();\n\ninline void dsn_global_init()\n{\n    // make perf_counters destructed after service_engine,\n    // because service_engine relies on the former to monitor\n    // task queues length.\n    dsn::perf_counters::instance();\n    dsn::service_engine::instance();\n}\n\nstatic std::string dsn_log_prefixed_message_func()\n{\n    std::string res;\n    res.resize(100);\n    char *prefixed_message = const_cast<char *>(res.c_str());\n\n    int tid = dsn::utils::get_current_tid();\n    auto t = dsn::task::get_current_task_id();\n    if (t) {\n        if (nullptr != dsn::task::get_current_worker2()) {\n            sprintf(prefixed_message,\n                    \"%6s.%7s%d.%016\" PRIx64 \": \",\n                    dsn::task::get_current_node_name(),\n                    dsn::task::get_current_worker2()->pool_spec().name.c_str(),\n                    dsn::task::get_current_worker2()->index(),\n                    t);\n        } else {\n            sprintf(prefixed_message,\n                    \"%6s.%7s.%05d.%016\" PRIx64 \": \",\n                    dsn::task::get_current_node_name(),\n                    \"io-thrd\",\n                    tid,\n                    t);\n        }\n    } else {\n        if (nullptr != dsn::task::get_current_worker2()) {\n            sprintf(prefixed_message,\n                    \"%6s.%7s%u: \",\n                    dsn::task::get_current_node_name(),\n                    dsn::task::get_current_worker2()->pool_spec().name.c_str(),\n                    dsn::task::get_current_worker2()->index());\n        } else {\n            sprintf(prefixed_message,\n                    \"%6s.%7s.%05d: \",\n                    dsn::task::get_current_node_name(),\n                    \"io-thrd\",\n                    tid);\n        }\n    }\n\n    return res;\n}\n\nbool run(const char *config_file,\n         const char *config_arguments,\n         bool is_server,\n         std::string &app_list)\n{\n    // We put the loading of configuration at the beginning of this func.\n    // Because in dsn_global_init(), it calls perf_counters::instance(), which calls\n    // shared_io_service::instance(). And in the cstor of shared_io_service, it calls\n    // dsn_config_get_value_uint64() to load the corresponding configs. That will make\n    // dsn_config_get_value_uint64() get wrong value if we put dsn_config_load at behind of\n    // dsn_global_init()\n    if (!dsn_config_load(config_file, config_arguments)) {\n        printf(\"Fail to load config file %s\\n\", config_file);\n        return false;\n    }\n    dsn::flags_initialize();\n\n    dsn_global_init();\n    dsn_core_init();\n    ::dsn::task::set_tls_dsn_context(nullptr, nullptr);\n\n    dsn_all.engine_ready = false;\n    dsn_all.config_completed = false;\n    dsn_all.tool = nullptr;\n    dsn_all.engine = &::dsn::service_engine::instance();\n    dsn_all.magic = 0xdeadbeef;\n\n    // pause when necessary\n    if (dsn_config_get_value_bool(\"core\",\n                                  \"pause_on_start\",\n                                  false,\n                                  \"whether to pause at startup time for easier debugging\")) {\n        printf(\"\\nPause for debugging (pid = %d)...\\n\", static_cast<int>(getpid()));\n        getchar();\n    }\n\n    for (int i = 0; i <= dsn::task_code::max(); i++) {\n        dsn_all.task_specs.push_back(::dsn::task_spec::get(i));\n    }\n\n    // initialize global specification from config file\n    ::dsn::service_spec spec;\n    if (!spec.init()) {\n        printf(\"error in config file %s, exit ...\\n\", config_file);\n        return false;\n    }\n\n    dsn_all.config_completed = true;\n\n    // setup data dir\n    auto &data_dir = spec.data_dir;\n    dassert(!dsn::utils::filesystem::file_exists(data_dir),\n            \"%s should not be a file.\",\n            data_dir.c_str());\n    if (!dsn::utils::filesystem::directory_exists(data_dir.c_str())) {\n        if (!dsn::utils::filesystem::create_directory(data_dir)) {\n            dassert(false, \"Fail to create %s.\", data_dir.c_str());\n        }\n    }\n    std::string cdir;\n    if (!dsn::utils::filesystem::get_absolute_path(data_dir.c_str(), cdir)) {\n        dassert(false, \"Fail to get absolute path from %s.\", data_dir.c_str());\n    }\n    spec.data_dir = cdir;\n\n    ::dsn::utils::coredump::init();\n\n    // setup log dir\n    spec.dir_log = ::dsn::utils::filesystem::path_combine(cdir, \"log\");\n    dsn::utils::filesystem::create_directory(spec.dir_log);\n\n    // init tools\n    dsn_all.tool.reset(::dsn::utils::factory_store<::dsn::tools::tool_app>::create(\n        spec.tool.c_str(), ::dsn::PROVIDER_TYPE_MAIN, spec.tool.c_str()));\n    dsn_all.tool->install(spec);\n\n    // init app specs\n    if (!spec.init_app_specs()) {\n        printf(\"error in config file %s, exit ...\\n\", config_file);\n        return false;\n    }\n\n#ifdef DSN_ENABLE_GPERF\n    double_t tcmalloc_release_rate =\n        (double_t)dsn_config_get_value_double(\"core\",\n                                              \"tcmalloc_release_rate\",\n                                              1., // [0, 10]\n                                              \"the memory releasing rate of tcmalloc, default is \"\n                                              \"1.0 in gperftools, value range is 0.0~10.0\");\n    ::MallocExtension::instance()->SetMemoryReleaseRate(tcmalloc_release_rate);\n#endif\n\n    // init logging\n    dsn_log_init(spec.logging_factory_name, spec.dir_log, dsn_log_prefixed_message_func);\n\n    // prepare minimum necessary\n    ::dsn::service_engine::instance().init_before_toollets(spec);\n\n    ddebug(\"process(%ld) start: %\" PRIu64 \", date: %s\",\n           getpid(),\n           dsn::utils::process_start_millis(),\n           dsn::utils::process_start_date_time_mills());\n\n    // init toollets\n    for (auto it = spec.toollets.begin(); it != spec.toollets.end(); ++it) {\n        auto tlet =\n            dsn::tools::internal_use_only::get_toollet(it->c_str(), ::dsn::PROVIDER_TYPE_MAIN);\n        dassert(tlet, \"toolet not found\");\n        tlet->install(spec);\n    }\n\n    // init provider specific system inits\n    dsn::tools::sys_init_before_app_created.execute();\n\n    // TODO: register sys_exit execution\n\n    // init runtime\n    ::dsn::service_engine::instance().init_after_toollets();\n\n    dsn_all.engine_ready = true;\n\n    // init security if FLAGS_enable_auth == true\n    if (dsn::security::FLAGS_enable_auth) {\n        if (!dsn::security::init(is_server)) {\n            return false;\n        }\n        // if FLAGS_enable_auth is false but FLAGS_enable_zookeeper_kerberos, we should init\n        // kerberos for it separately\n        // include two steps:\n        // 1) apply kerberos ticket and keep it valid\n        // 2) complete sasl init for client(use FLAGS_sasl_plugin_path)\n    } else if (dsn::security::FLAGS_enable_zookeeper_kerberos && app_list == \"meta\") {\n        if (!dsn::security::init_for_zookeeper_client()) {\n            return false;\n        }\n    }\n\n    // split app_name and app_index\n    std::list<std::string> applistkvs;\n    ::dsn::utils::split_args(app_list.c_str(), applistkvs, ';');\n\n    // init apps\n    for (auto &sp : spec.app_specs) {\n        if (!sp.run)\n            continue;\n\n        bool create_it = false;\n\n        // create all apps\n        if (app_list == \"\") {\n            create_it = true;\n        } else {\n            for (auto &kv : applistkvs) {\n                std::list<std::string> argskvs;\n                ::dsn::utils::split_args(kv.c_str(), argskvs, '@');\n                if (std::string(\"apps.\") + argskvs.front() == sp.config_section) {\n                    if (argskvs.size() < 2)\n                        create_it = true;\n                    else\n                        create_it = (std::stoi(argskvs.back()) == sp.index);\n                    break;\n                }\n            }\n        }\n\n        if (create_it) {\n            ::dsn::service_engine::instance().start_node(sp);\n        }\n    }\n\n    if (dsn::service_engine::instance().get_all_nodes().size() == 0) {\n        printf(\"no app are created, usually because \\n\"\n               \"app_name is not specified correctly, should be 'xxx' in [apps.xxx]\\n\"\n               \"or app_index (1-based) is greater than specified count in config file\\n\");\n        exit(1);\n    }\n\n    dsn::command_manager::instance().register_command({\"config-dump\"},\n                                                      \"config-dump - dump configuration\",\n                                                      \"config-dump [to-this-config-file]\",\n                                                      [](const std::vector<std::string> &args) {\n                                                          std::ostringstream oss;\n                                                          std::ofstream off;\n                                                          std::ostream *os = &oss;\n                                                          if (args.size() > 0) {\n                                                              off.open(args[0]);\n                                                              os = &off;\n\n                                                              oss << \"config dump to file \"\n                                                                  << args[0] << std::endl;\n                                                          }\n\n                                                          dsn_config_dump(*os);\n                                                          return oss.str();\n                                                      });\n\n    // invoke customized init after apps are created\n    dsn::tools::sys_init_after_app_created.execute();\n\n    // start the tool\n    dsn_all.tool->run();\n\n    if (is_server) {\n        while (true) {\n            std::this_thread::sleep_for(std::chrono::hours(1));\n        }\n    }\n\n    // add this to allow mimic app call from this thread.\n    memset((void *)&dsn::tls_dsn, 0, sizeof(dsn::tls_dsn));\n\n    return true;\n}\n\nnamespace dsn {\nservice_app *service_app::new_service_app(const std::string &type,\n                                          const dsn::service_app_info *info)\n{\n    return dsn::utils::factory_store<service_app>::create(\n        type.c_str(), dsn::PROVIDER_TYPE_MAIN, info);\n}\n\nservice_app::service_app(const dsn::service_app_info *info) : _info(info), _started(false)\n{\n    security::negotiation_manager::instance().open_service();\n}\n\nconst service_app_info &service_app::info() const { return *_info; }\n\nconst service_app_info &service_app::current_service_app_info()\n{\n    return tls_dsn.node->get_service_app_info();\n}\n\nvoid service_app::get_all_service_apps(std::vector<service_app *> *apps)\n{\n    const service_nodes_by_app_id &nodes = dsn_all.engine->get_all_nodes();\n    for (const auto &kv : nodes) {\n        const service_node *node = kv.second.get();\n        apps->push_back(const_cast<service_app *>(node->get_service_app()));\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/service_engine.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"service_engine.h\"\n#include \"runtime/task/task_engine.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/tool-api/env_provider.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool_api.h>\n#include <dsn/tool/node_scoper.h>\n\nusing namespace dsn::utils;\n\nnamespace dsn {\n\nservice_node::service_node(service_app_spec &app_spec) { _app_spec = app_spec; }\n\nbool service_node::rpc_register_handler(task_code code,\n                                        const char *extra_name,\n                                        const rpc_request_handler &h)\n{\n    return _rpc->register_rpc_handler(code, extra_name, h);\n}\n\nbool service_node::rpc_unregister_handler(dsn::task_code rpc_code)\n{\n    return _rpc->unregister_rpc_handler(rpc_code);\n}\n\nerror_code service_node::init_rpc_engine()\n{\n    // init rpc engine\n    _rpc = make_unique<rpc_engine>(this);\n\n    // start rpc engine\n    return _rpc->start(_app_spec);\n}\n\ndsn::error_code service_node::start_app()\n{\n    dassert(_entity.get(), \"entity hasn't initialized\");\n    _entity->set_address(rpc()->primary_address());\n\n    std::vector<std::string> args;\n    utils::split_args(spec().arguments.c_str(), args);\n    args.insert(args.begin(), spec().full_name);\n    dsn::error_code res = _entity->start(args);\n    if (res == dsn::ERR_OK) {\n        _entity->set_started(true);\n    }\n    return res;\n}\n\ndsn::error_code service_node::stop_app(bool cleanup)\n{\n    dassert(_entity.get(), \"entity hasn't initialized\");\n    dsn::error_code res = _entity->stop(cleanup);\n    if (res == dsn::ERR_OK) {\n        _entity->set_started(false);\n    }\n    return res;\n}\n\nvoid service_node::init_service_app()\n{\n    _info.entity_id = _app_spec.id;\n    _info.index = _app_spec.index;\n    _info.role_name = _app_spec.role_name;\n    _info.type = _app_spec.type;\n    _info.full_name = _app_spec.full_name;\n    _info.data_dir = _app_spec.data_dir;\n\n    _entity.reset(service_app::new_service_app(_app_spec.type, &_info));\n}\n\nerror_code service_node::start()\n{\n    error_code err = ERR_OK;\n\n    // init data dir\n    if (!dsn::utils::filesystem::path_exists(spec().data_dir))\n        dsn::utils::filesystem::create_directory(spec().data_dir);\n\n    // init task engine\n    _computation = make_unique<task_engine>(this);\n    _computation->create(_app_spec.pools);\n    dassert(!_computation->is_started(), \"task engine must not be started at this point\");\n\n    // init rpc\n    err = init_rpc_engine();\n    if (err != ERR_OK)\n        return err;\n\n    // start task engine\n    _computation->start();\n    dassert(_computation->is_started(), \"task engine must be started at this point\");\n\n    // create service_app\n    {\n        ::dsn::tools::node_scoper scoper(this);\n        init_service_app();\n    }\n\n    // start rpc serving\n    _rpc->start_serving();\n\n    return err;\n}\n\nvoid service_node::get_runtime_info(const std::string &indent,\n                                    const std::vector<std::string> &args,\n                                    /*out*/ std::stringstream &ss)\n{\n    ss << indent << full_name() << \":\" << std::endl;\n\n    std::string indent2 = indent + \"\\t\";\n    _computation->get_runtime_info(indent2, args, ss);\n}\n\nvoid service_node::get_queue_info(\n    /*out*/ std::stringstream &ss)\n{\n    ss << \"{\\\"app_name\\\":\\\"\" << full_name() << \"\\\",\\n\\\"thread_pool\\\":[\\n\";\n    _computation->get_queue_info(ss);\n    ss << \"]}\";\n}\n\nrpc_request_task *service_node::generate_intercepted_request_task(message_ex *req)\n{\n    bool is_write = task_spec::get(req->local_rpc_code)->rpc_request_is_write_operation;\n    rpc_request_task *t = new rpc_request_task(req,\n                                               std::bind(&service_app::on_intercepted_request,\n                                                         _entity.get(),\n                                                         req->header->gpid,\n                                                         is_write,\n                                                         std::placeholders::_1),\n                                               this);\n    t->spec().on_task_create.execute(nullptr, t);\n    return t;\n}\n\nservice_node::~service_node()\n{\n    _rpc->stop_serving();\n    stop_app(false);\n    _computation->stop();\n}\n\n//////////////////////////////////////////////////////////////////////////////////////////\n\nservice_engine::service_engine()\n{\n    _env = nullptr;\n\n    _get_runtime_info_cmd = dsn::command_manager::instance().register_command(\n        {\"engine\"},\n        \"engine - get engine internal information\",\n        \"engine [app-id]\",\n        &service_engine::get_runtime_info);\n\n    _get_queue_info_cmd = dsn::command_manager::instance().register_command(\n        {\"system.queue\"},\n        \"system.queue - get queue internal information\",\n        \"system.queue\",\n        &service_engine::get_queue_info);\n}\n\nservice_engine::~service_engine()\n{\n    _nodes_by_app_id.clear();\n\n    UNREGISTER_VALID_HANDLER(_get_runtime_info_cmd);\n    UNREGISTER_VALID_HANDLER(_get_queue_info_cmd);\n}\n\nvoid service_engine::init_before_toollets(const service_spec &spec)\n{\n    _spec = spec;\n\n    // init common for all per-node providers\n    message_ex::s_local_hash =\n        (uint32_t)dsn_config_get_value_uint64(\"core\",\n                                              \"local_hash\",\n                                              0,\n                                              \"a same hash value from two processes indicate the \"\n                                              \"rpc code are registered in the same order, \"\n                                              \"and therefore the mapping between rpc code string \"\n                                              \"and integer is the same, which we leverage \"\n                                              \"for fast rpc handler lookup optimization\");\n}\n\nvoid service_engine::init_after_toollets()\n{\n    // init common providers (second half)\n    _env = factory_store<env_provider>::create(\n        _spec.env_factory_name.c_str(), PROVIDER_TYPE_MAIN, nullptr);\n    tls_dsn.env = _env;\n}\n\nvoid service_engine::start_node(service_app_spec &app_spec)\n{\n    std::unordered_map<int, std::string> app_name_by_port;\n    auto it = _nodes_by_app_id.find(app_spec.id);\n    if (it == _nodes_by_app_id.end()) {\n        for (auto p : app_spec.ports) {\n            // union to existing node if any port is shared\n            auto it = app_name_by_port.find(p);\n            if (it != app_name_by_port.end()) {\n                dassert_f(false,\n                          \"network port {} usage confliction for {} vs {}, \"\n                          \"please reconfig\",\n                          p,\n                          it->second,\n                          app_spec.full_name);\n            }\n            app_name_by_port.emplace(p, app_spec.full_name);\n        }\n\n        auto node = std::make_shared<service_node>(app_spec);\n        error_code err = node->start();\n        dassert_f(err == ERR_OK, \"service node start failed, err = {}\", err.to_string());\n\n        _nodes_by_app_id[node->id()] = node;\n    }\n}\n\nstd::string service_engine::get_runtime_info(const std::vector<std::string> &args)\n{\n    std::stringstream ss;\n    if (args.size() == 0) {\n        ss << \"\" << service_engine::instance()._nodes_by_app_id.size()\n           << \" nodes available:\" << std::endl;\n        for (auto &kv : service_engine::instance()._nodes_by_app_id) {\n            ss << \"\\t\" << kv.second->id() << \".\" << kv.second->full_name() << std::endl;\n        }\n    } else {\n        std::string indent = \"\";\n        int id = atoi(args[0].c_str());\n        auto it = service_engine::instance()._nodes_by_app_id.find(id);\n        if (it != service_engine::instance()._nodes_by_app_id.end()) {\n            auto args2 = args;\n            args2.erase(args2.begin());\n            it->second->get_runtime_info(indent, args2, ss);\n        } else {\n            ss << \"cannot find node with given app id\";\n        }\n    }\n    return ss.str();\n}\n\nstd::string service_engine::get_queue_info(const std::vector<std::string> &args)\n{\n    std::stringstream ss;\n    ss << \"[\";\n    for (auto &it : service_engine::instance()._nodes_by_app_id) {\n        if (it.first != service_engine::instance()._nodes_by_app_id.begin()->first)\n            ss << \",\";\n        it.second->get_queue_info(ss);\n    }\n    ss << \"]\";\n    return ss.str();\n}\n\nbool service_engine::is_simulator() const { return _simulator; }\n\nvoid service_engine::set_simulator() { _simulator = true; }\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/service_engine.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <sstream>\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/tool-api/global_config.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/cpp/service_app.h>\n\nnamespace dsn {\n\nclass task_engine;\nclass rpc_engine;\nclass env_provider;\nclass nfs_node;\nclass task_queue;\nclass task_worker_pool;\nclass timer_service;\n\n//\n//\n//\nclass service_node\n{\npublic:\n    explicit service_node(service_app_spec &app_spec);\n\n    ~service_node();\n\n    rpc_engine *rpc() const { return _rpc.get(); }\n    task_engine *computation() const { return _computation.get(); }\n\n    void get_runtime_info(const std::string &indent,\n                          const std::vector<std::string> &args,\n                          /*out*/ std::stringstream &ss);\n    void get_queue_info(/*out*/ std::stringstream &ss);\n\n    dsn::error_code start();\n    dsn::error_code start_app();\n    dsn::error_code stop_app(bool cleanup);\n\n    int id() const { return _app_spec.id; }\n    const char *full_name() const { return _app_spec.full_name.c_str(); }\n    const service_app_spec &spec() const { return _app_spec; }\n    const service_app_info &get_service_app_info() const { return _info; }\n    const service_app *get_service_app() const { return _entity.get(); }\n    bool rpc_register_handler(task_code code, const char *extra_name, const rpc_request_handler &h);\n    bool rpc_unregister_handler(task_code rpc_code);\n\n    rpc_request_task *generate_intercepted_request_task(message_ex *req);\n\nprivate:\n    service_app_info _info;\n    std::unique_ptr<service_app> _entity;\n\n    service_app_spec _app_spec;\n\n    std::unique_ptr<task_engine> _computation;\n    std::unique_ptr<rpc_engine> _rpc;\n\nprivate:\n    // the service entity is initialized after the engine\n    // is initialized, so this should be call in start()\n    void init_service_app();\n\n    error_code init_rpc_engine();\n};\n\ntypedef std::map<int, std::shared_ptr<service_node>> service_nodes_by_app_id;\nclass service_engine : public utils::singleton<service_engine>\n{\npublic:\n    // ServiceMode Mode() const { return _spec.Mode; }\n    const service_spec &spec() const { return _spec; }\n    env_provider *env() const { return _env; }\n    static std::string get_runtime_info(const std::vector<std::string> &args);\n    static std::string get_queue_info(const std::vector<std::string> &args);\n\n    void init_before_toollets(const service_spec &spec);\n    void init_after_toollets();\n\n    void start_node(service_app_spec &app_spec);\n    const service_nodes_by_app_id &get_all_nodes() const { return _nodes_by_app_id; }\n    bool is_simulator() const;\n    void set_simulator();\n\nprivate:\n    service_engine();\n    ~service_engine();\n\n    service_spec _spec;\n    env_provider *_env;\n\n    dsn_handle_t _get_runtime_info_cmd;\n    dsn_handle_t _get_queue_info_cmd;\n\n    bool _simulator;\n\n    // map app_id to service_node\n    service_nodes_by_app_id _nodes_by_app_id;\n\n    friend class utils::singleton<service_engine>;\n};\n\n// ------------ inline impl ---------------------\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/sim_clock.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <dsn/utility/clock.h>\n#include \"scheduler.h\"\n\nnamespace dsn {\nnamespace tools {\n\nclass sim_clock : public utils::clock\n{\npublic:\n    sim_clock() = default;\n    virtual ~sim_clock() = default;\n\n    // Gets simulated time in nanoseconds.\n    virtual uint64_t now_ns() const { return scheduler::instance().now_ns(); }\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/simulator.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool/simulator.h>\n#include \"scheduler.h\"\n#include \"service_engine.h\"\n\n#include \"env.sim.h\"\n#include \"runtime/task/task_engine.sim.h\"\n#include \"sim_clock.h\"\n\nnamespace dsn {\nnamespace tools {\n\n/*static*/\nvoid simulator::register_checker(const std::string &name, checker::factory f)\n{\n    scheduler::instance().add_checker(name, f);\n}\n\nvoid simulator::install(service_spec &spec)\n{\n    register_component_provider<sim_env_provider>(\"dsn::tools::sim_env_provider\");\n    register_component_provider<sim_task_queue>(\"dsn::tools::sim_task_queue\");\n    register_component_provider<sim_timer_service>(\"dsn::tools::sim_timer_service\");\n\n    semaphore_provider::register_component<sim_semaphore_provider>(\n        \"dsn::tools::sim_semaphore_provider\");\n    lock_provider::register_component<sim_lock_provider>(\"dsn::tools::sim_lock_provider\");\n    lock_nr_provider::register_component<sim_lock_nr_provider>(\"dsn::tools::sim_lock_nr_provider\");\n    rwlock_nr_provider::register_component<sim_rwlock_nr_provider>(\n        \"dsn::tools::sim_rwlock_nr_provider\");\n\n    scheduler::instance();\n\n    if (spec.env_factory_name == \"\")\n        spec.env_factory_name = (\"dsn::tools::sim_env_provider\");\n\n    if (spec.timer_factory_name == \"\")\n        spec.timer_factory_name = (\"dsn::tools::sim_timer_service\");\n\n    network_client_config cs;\n    cs.factory_name = \"dsn::tools::sim_network_provider\";\n    cs.message_buffer_block_size = 1024 * 64;\n    spec.network_default_client_cfs[RPC_CHANNEL_TCP] = cs;\n    spec.network_default_client_cfs[RPC_CHANNEL_UDP] = cs;\n\n    network_server_config cs2;\n    cs2.port = 0;\n    cs2.factory_name = \"dsn::tools::sim_network_provider\";\n    cs2.message_buffer_block_size = 1024 * 64;\n    cs2.channel = RPC_CHANNEL_TCP;\n    spec.network_default_server_cfs[cs2] = cs2;\n    cs2.channel = RPC_CHANNEL_UDP;\n    spec.network_default_server_cfs[cs2] = cs2;\n\n    if (spec.logging_factory_name == \"\")\n        spec.logging_factory_name = \"dsn::tools::simple_logger\";\n\n    if (spec.lock_factory_name == \"\")\n        spec.lock_factory_name = (\"dsn::tools::sim_lock_provider\");\n\n    if (spec.lock_nr_factory_name == \"\")\n        spec.lock_nr_factory_name = (\"dsn::tools::sim_lock_nr_provider\");\n\n    if (spec.rwlock_nr_factory_name == \"\")\n        spec.rwlock_nr_factory_name = (\"dsn::tools::sim_rwlock_nr_provider\");\n\n    if (spec.semaphore_factory_name == \"\")\n        spec.semaphore_factory_name = (\"dsn::tools::sim_semaphore_provider\");\n\n    for (auto it = spec.threadpool_specs.begin(); it != spec.threadpool_specs.end(); ++it) {\n        threadpool_spec &tspec = *it;\n\n        if (tspec.worker_factory_name == \"\")\n            tspec.worker_factory_name = (\"dsn::task_worker\");\n\n        if (tspec.queue_factory_name == \"\")\n            tspec.queue_factory_name = (\"dsn::tools::sim_task_queue\");\n    }\n\n    sys_exit.put_front(simulator::on_system_exit, \"simulator\");\n\n    // the new sim_clock is taken over by unique_ptr in clock instance\n    utils::clock::instance()->mock(new sim_clock());\n\n    service_engine::instance().set_simulator();\n}\n\nvoid simulator::on_system_exit(sys_exit_type st)\n{\n    derror(\"system exits, you can replay this process using random seed %d\",\n           sim_env_provider::seed());\n}\n\nvoid simulator::run()\n{\n    scheduler::instance().start();\n    tool_app::run();\n}\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.task)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_object()\n"
  },
  {
    "path": "src/runtime/task/hpc_task_queue.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"hpc_task_queue.h\"\n#include <boost/function_output_iterator.hpp>\n\nnamespace dsn {\nnamespace tools {\n\nhpc_concurrent_task_queue::hpc_concurrent_task_queue(task_worker_pool *pool,\n                                                     int index,\n                                                     task_queue *inner_provider)\n    : task_queue(pool, index, inner_provider)\n{\n}\n\nvoid hpc_concurrent_task_queue::enqueue(task *task)\n{\n    _queues[task->spec().priority].q.enqueue(task);\n    _sema.signal(1);\n}\n\ntask *hpc_concurrent_task_queue::dequeue(int &batch_size)\n{\n    batch_size = _sema.waitMany(batch_size);\n    if (batch_size == 0) {\n        return nullptr;\n    }\n    task *head = nullptr, *last = nullptr;\n    auto out = boost::make_function_output_iterator([&head, &last](task *in) {\n        if (last) {\n            last->next = in;\n        } else {\n            head = in;\n        }\n\n        last = in;\n        last->next = nullptr;\n    });\n    auto count = batch_size;\n    do {\n        for (auto &qs : _queues) {\n            count -= qs.q.try_dequeue_bulk(out, count);\n            if (count == 0) {\n                break;\n            }\n        }\n    } while (count != 0);\n    return head;\n}\n}\n}\n"
  },
  {
    "path": "src/runtime/task/hpc_task_queue.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <concurrentqueue/concurrentqueue.h>\n#include <concurrentqueue/blockingconcurrentqueue.h>\n\n#include <dsn/tool-api/task_queue.h>\n\nnamespace dsn {\nnamespace tools {\nclass hpc_concurrent_task_queue : public task_queue\n{\n    moodycamel::LightweightSemaphore _sema;\n    struct queue_t\n    {\n        moodycamel::ConcurrentQueue<task *> q;\n    } _queues[TASK_PRIORITY_COUNT];\n\npublic:\n    hpc_concurrent_task_queue(task_worker_pool *pool, int index, task_queue *inner_provider);\n\n    void enqueue(task *task) override;\n\n    task *dequeue(/*inout*/ int &batch_size) override;\n};\n}\n}\n"
  },
  {
    "path": "src/runtime/task/simple_task_queue.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"simple_task_queue.h\"\n\nnamespace dsn {\nnamespace tools {\n\nsimple_timer_service::simple_timer_service(service_node *node, timer_service *inner_provider)\n    : timer_service(node, inner_provider), _is_running(false)\n{\n}\n\nvoid simple_timer_service::start()\n{\n    if (_is_running) {\n        return;\n    }\n\n    _worker = std::thread([this]() {\n        task::set_tls_dsn_context(node(), nullptr);\n\n        char buffer[128];\n        sprintf(buffer, \"%s.timer\", get_service_node_name(node()));\n\n        task_worker::set_name(buffer);\n        task_worker::set_priority(worker_priority_t::THREAD_xPRIORITY_ABOVE_NORMAL);\n\n        boost::asio::io_service::work work(_ios);\n        boost::system::error_code ec;\n        _ios.run(ec);\n        if (ec) {\n            dassert(\n                false, \"io_service in simple_timer_service run failed: %s\", ec.message().data());\n        }\n    });\n    _is_running = true;\n}\n\nvoid simple_timer_service::stop()\n{\n    if (!_is_running) {\n        return;\n    }\n\n    _ios.stop();\n    _worker.join();\n    _is_running = false;\n}\n\nvoid simple_timer_service::add_timer(task *task)\n{\n    std::shared_ptr<boost::asio::deadline_timer> timer(new boost::asio::deadline_timer(_ios));\n    timer->expires_from_now(boost::posix_time::milliseconds(task->delay_milliseconds()));\n    task->set_delay(0);\n\n    timer->async_wait([task, timer](const boost::system::error_code &ec) {\n        if (!ec) {\n            task->enqueue();\n        } else if (ec != ::boost::asio::error::operation_aborted) {\n            dfatal(\"timer failed for task %s, err = %u\", task->spec().name.c_str(), ec.value());\n        }\n\n        // to consume the added ref count by task::enqueue for add_timer\n        task->release_ref();\n    });\n}\n\nsimple_task_queue::simple_task_queue(task_worker_pool *pool, int index, task_queue *inner_provider)\n    : task_queue(pool, index, inner_provider), _samples(\"\")\n{\n}\n\nvoid simple_task_queue::enqueue(task *task) { _samples.enqueue(task, task->spec().priority); }\n\n// always return 1 or 0 task so far\ntask *simple_task_queue::dequeue(/*inout*/ int &batch_size)\n{\n    long c = 0;\n    auto t = _samples.dequeue(c);\n    dassert(t != nullptr, \"dequeue does not return empty tasks\");\n    batch_size = 1;\n    return t;\n}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/simple_task_queue.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <dsn/utility/priority_queue.h>\n#include <boost/asio.hpp>\n\nnamespace dsn {\nnamespace tools {\nclass simple_task_queue : public task_queue\n{\npublic:\n    simple_task_queue(task_worker_pool *pool, int index, task_queue *inner_provider);\n\n    ~simple_task_queue() override = default;\n\n    virtual void enqueue(task *task) override;\n    virtual task *dequeue(/*inout*/ int &batch_size) override;\n\nprivate:\n    typedef utils::blocking_priority_queue<task *, TASK_PRIORITY_COUNT> tqueue;\n    tqueue _samples;\n};\n\nclass simple_timer_service : public timer_service\n{\npublic:\n    simple_timer_service(service_node *node, timer_service *inner_provider);\n\n    ~simple_timer_service() override { stop(); }\n\n    // after milliseconds, the provider should call task->enqueue()\n    virtual void add_timer(task *task) override;\n\n    virtual void start() override;\n\n    virtual void stop() override;\n\nprivate:\n    boost::asio::io_service _ios;\n    std::thread _worker;\n    bool _is_running;\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/task.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     task is the execution of a piece of sequence code, which completes\n *     a meaningful application level task.\n *\n * Revision history:\n *     Mar., 2015, @imzhenyu (Zhenyu Guo), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/synchronize.h>\n#include <dsn/utility/rand.h>\n#include <dsn/tool/node_scoper.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"task_engine.h\"\n#include \"runtime/service_engine.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n\nnamespace dsn {\n__thread struct __tls_dsn__ tls_dsn;\n__thread uint16_t tls_dsn_lower32_task_id_mask = 0;\n\n/*static*/ void task::set_tls_dsn_context(service_node *node, // cannot be null\n                                          task_worker *worker)\n{\n    memset(static_cast<void *>(&tls_dsn), 0, sizeof(tls_dsn));\n    tls_dsn.magic = 0xdeadbeef;\n    tls_dsn.worker_index = -1;\n\n    if (node) {\n        tls_dsn.node_id = node->id();\n\n        if (worker != nullptr) {\n            dassert(worker->pool()->node() == node,\n                    \"worker not belonging to the given node: %s vs %s\",\n                    worker->pool()->node()->full_name(),\n                    node->full_name());\n        }\n\n        tls_dsn.node = node;\n        tls_dsn.worker = worker;\n        tls_dsn.worker_index = worker ? worker->index() : -1;\n        tls_dsn.current_task = nullptr;\n        tls_dsn.rpc = node->rpc();\n        tls_dsn.env = service_engine::instance().env();\n    }\n\n    tls_dsn.node_pool_thread_ids = (node ? ((uint64_t)(uint8_t)node->id()) : 0)\n                                   << (64 - 8); // high 8 bits for node id\n    tls_dsn.node_pool_thread_ids |=\n        (worker ? ((uint64_t)(uint8_t)(int)worker->pool_spec().pool_code) : 0)\n        << (64 - 8 - 8); // next 8 bits for pool id\n    auto worker_idx = worker ? worker->index() : -1;\n    if (worker_idx == -1) {\n        worker_idx = utils::get_current_tid();\n    }\n    tls_dsn.node_pool_thread_ids |= ((uint64_t)(uint16_t)worker_idx)\n                                    << 32; // next 16 bits for thread id\n    tls_dsn.last_lower32_task_id = worker ? 0 : ((uint32_t)(++tls_dsn_lower32_task_id_mask)) << 16;\n}\n\n/*static*/ void task::on_tls_dsn_not_set()\n{\n    if (service_engine::instance().spec().enable_default_app_mimic) {\n        dsn_mimic_app(\"mimic\", 1);\n    } else {\n        dassert(false,\n                \"rDSN context is not initialized properly, to be fixed as follows:\\n\"\n                \"(1). the current thread does NOT belongs to any rDSN service node, please invoke \"\n                \"dsn_mimic_app first,\\n\"\n                \"     or, you can enable [core] enable_default_app_mimic = true in your config \"\n                \"file so mimic_app can be omitted\\n\"\n                \"(2). the current thread belongs to a rDSN service node, and you are writing \"\n                \"providers for rDSN, please use\\n\"\n                \"     task::set_tls_dsn_context(...) at the beginning of your new thread in your \"\n                \"providers;\\n\"\n                \"(3). this should not happen, please help fire an issue so we we can investigate\");\n    }\n}\n\ntask::task(dsn::task_code code, int hash, service_node *node)\n    : _state(TASK_STATE_READY), _wait_event(nullptr)\n{\n    _spec = task_spec::get(code);\n    _hash = hash;\n    _delay_milliseconds = 0;\n    _wait_for_cancel = false;\n    _is_null = false;\n    next = nullptr;\n\n    if (node != nullptr) {\n        _node = node;\n    } else {\n        auto p = get_current_node();\n        dassert(p != nullptr,\n                \"tasks without explicit service node \"\n                \"can only be created inside threads which is attached to specific node\");\n        _node = p;\n    }\n\n    if (tls_dsn.magic != 0xdeadbeef) {\n        set_tls_dsn_context(nullptr, nullptr);\n    }\n\n    _task_id = tls_dsn.node_pool_thread_ids + (++tls_dsn.last_lower32_task_id);\n}\n\ntask::~task()\n{\n    // ATTENTION: should do unset_tracker defore delete _wait_event\n    _context_tracker.unset_tracker();\n\n    if (nullptr != _wait_event.load()) {\n        delete (utils::notify_event *)_wait_event.load();\n        _wait_event.store(nullptr);\n    }\n}\n\nbool task::set_retry(bool enqueue_immediately /*= true*/)\n{\n    task_state RUNNING_STATE = TASK_STATE_RUNNING;\n    if (_state.compare_exchange_strong(\n            RUNNING_STATE, TASK_STATE_READY, std::memory_order_relaxed)) {\n        _error = enqueue_immediately ? ERR_OK : ERR_IO_PENDING;\n        return true;\n    } else\n        return false;\n}\n\nvoid task::exec_internal()\n{\n    task_state READY_STATE = TASK_STATE_READY;\n    task_state RUNNING_STATE = TASK_STATE_RUNNING;\n    bool notify_if_necessary = true;\n\n    if (_state.compare_exchange_strong(\n            READY_STATE, TASK_STATE_RUNNING, std::memory_order_relaxed)) {\n        dassert(tls_dsn.magic == 0xdeadbeef, \"thread is not inited with task::set_tls_dsn_context\");\n\n        task *parent_task = tls_dsn.current_task;\n        tls_dsn.current_task = this;\n\n        _spec->on_task_begin.execute(this);\n\n        exec();\n\n        // after exec(), one shot tasks are still in \"running\".\n        // other tasks may call \"set_retry\" to reset tasks to \"ready\",\n        // like timers and rpc_response_tasks\n        if (_state.compare_exchange_strong(RUNNING_STATE,\n                                           TASK_STATE_FINISHED,\n                                           std::memory_order_release,\n                                           std::memory_order_relaxed)) {\n            _spec->on_task_end.execute(this);\n            clear_non_trivial_on_task_end();\n        } else {\n            if (!_wait_for_cancel) {\n                // for retried tasks such as timer or rpc_response_task\n                notify_if_necessary = false;\n                _spec->on_task_end.execute(this);\n\n                if (ERR_OK == _error)\n                    enqueue();\n            } else {\n                // for cancelled\n                if (_state.compare_exchange_strong(READY_STATE,\n                                                   TASK_STATE_CANCELLED,\n                                                   std::memory_order_release,\n                                                   std::memory_order_relaxed)) {\n                    _spec->on_task_cancelled.execute(this);\n                }\n\n                // always call on_task_end()\n                _spec->on_task_end.execute(this);\n\n                // for timer task, we must call reset_callback after cancelled, because we don't\n                // reset callback after exec()\n                clear_non_trivial_on_task_end();\n            }\n        }\n\n        tls_dsn.current_task = parent_task;\n    }\n\n    if (notify_if_necessary) {\n        if (signal_waiters()) {\n            spec().on_task_wait_notified.execute(this);\n        }\n    }\n\n    if (!_spec->allow_inline && !_is_null) {\n        lock_checker::check_dangling_lock();\n    }\n\n    this->release_ref(); // added in enqueue(pool)\n}\n\nbool task::signal_waiters()\n{\n    void *evt = _wait_event.load();\n    if (evt != nullptr) {\n        auto nevt = (utils::notify_event *)evt;\n        nevt->notify();\n        return true;\n    }\n    return false;\n}\n\nstatic void check_wait_task(task *waitee)\n{\n    lock_checker::check_wait_safety();\n\n    // not in worker thread\n    if (task::get_current_worker() == nullptr)\n        return;\n\n    // caller and callee don't share the same thread pool,\n    if (waitee->spec().type != TASK_TYPE_RPC_RESPONSE &&\n        (waitee->spec().pool_code != task::get_current_worker()->pool_spec().pool_code))\n        return;\n\n    // callee is empty\n    if (waitee->is_empty())\n        return;\n\n    // there are enough concurrency\n    if (!task::get_current_worker()->pool_spec().partitioned &&\n        task::get_current_worker()->pool_spec().worker_count > 1)\n        return;\n\n    dwarn(\"task %s waits for another task %s sharing the same thread pool \"\n          \"- will lead to deadlocks easily (e.g., when worker_count = 1 or when the pool \"\n          \"is partitioned)\",\n          task::get_current_task()->spec().code.to_string(),\n          waitee->spec().code.to_string());\n}\n\nbool task::wait_on_cancel()\n{\n    check_wait_task(this);\n    return wait(TIME_MS_MAX);\n}\n\nbool task::wait(int timeout_milliseconds)\n{\n    dassert(this != task::get_current_task(), \"task cannot wait itself\");\n\n    auto cs = state();\n\n    if (cs >= TASK_STATE_FINISHED) {\n        spec().on_task_wait_post.execute(get_current_task(), this, true);\n        return true;\n    }\n\n    // TODO: using event pool instead\n    void *evt = _wait_event.load();\n    if (evt == nullptr) {\n        evt = new utils::notify_event();\n\n        void *null_h = nullptr;\n        if (!_wait_event.compare_exchange_strong(null_h, evt)) {\n            delete (utils::notify_event *)evt;\n            evt = _wait_event.load();\n        }\n    }\n\n    spec().on_task_wait_pre.execute(get_current_task(), this, (uint32_t)timeout_milliseconds);\n\n    bool ret = (state() >= TASK_STATE_FINISHED);\n    if (!ret) {\n        auto nevt = (utils::notify_event *)evt;\n        ret = (nevt->wait_for(timeout_milliseconds));\n    }\n\n    spec().on_task_wait_post.execute(get_current_task(), this, ret);\n    return ret;\n}\n\nbool task::cancel(bool wait_until_finished, /*out*/ bool *finished /*= nullptr*/)\n{\n    task_state READY_STATE = TASK_STATE_READY;\n    task *current_tsk = get_current_task();\n    bool finish = false;\n    bool succ = false;\n\n    if (current_tsk != this) {\n        if (_state.compare_exchange_strong(\n                READY_STATE, TASK_STATE_CANCELLED, std::memory_order_relaxed)) {\n            succ = true;\n            finish = true;\n        } else {\n            task_state old_state = READY_STATE;\n            if (old_state == TASK_STATE_CANCELLED) {\n                succ = false; // this cancellation fails\n                finish = true;\n            } else if (old_state == TASK_STATE_FINISHED) {\n                succ = false;\n                finish = true;\n            } else if (wait_until_finished) {\n                _wait_for_cancel = true;\n                bool r = wait_on_cancel();\n                dassert(\n                    r,\n                    \"wait failed, it is only possible when task runs for more than 0x0fffffff ms\");\n\n                succ = false;\n                finish = true;\n            } else {\n                succ = false;\n                finish = false;\n            }\n        }\n    } else {\n        // task cancel itself\n        // for timer task, we should set _wait_for_cancel flag to\n        // prevent timer task from enqueueing again\n        _wait_for_cancel = true;\n    }\n\n    if (current_tsk != nullptr) {\n        current_tsk->spec().on_task_cancel_post.execute(current_tsk, this, succ);\n    }\n\n    if (succ) {\n        spec().on_task_cancelled.execute(this);\n        signal_waiters();\n\n        // we call clear_callback only cancelling succeed.\n        // otherwise, task will successfully exececuted and clear_callback will be called\n        // in \"exec_internal\".\n        clear_non_trivial_on_task_end();\n    }\n\n    if (finished)\n        *finished = finish;\n\n    return succ;\n}\n\nconst char *task::get_current_node_name()\n{\n    auto n = get_current_node2();\n    return n ? n->full_name() : \"unknown\";\n}\n\nvoid task::enqueue()\n{\n    dassert(_node != nullptr, \"service node unknown for this task\");\n    dassert(_spec->type != TASK_TYPE_RPC_RESPONSE,\n            \"tasks with TASK_TYPE_RPC_RESPONSE type use task::enqueue(caller_pool()) instead\");\n    dassert(_error != ERR_IO_PENDING, \"task is waiting for IO, can not be enqueue\");\n\n    auto pool = node()->computation()->get_pool(spec().pool_code);\n    enqueue(pool);\n}\n\nvoid task::enqueue(task_worker_pool *pool)\n{\n    this->add_ref(); // released in exec_internal (even when cancelled)\n\n    dassert(pool != nullptr,\n            \"pool %s not ready, and there are usually two cases: \"\n            \"(1). thread pool not designatd in '[%s] pools'; \"\n            \"(2). the caller is executed in io threads \"\n            \"which is forbidden unless you explicitly set [task.%s].allow_inline = true\",\n            _spec->pool_code.to_string(),\n            _node->spec().config_section.c_str(),\n            _spec->name.c_str());\n\n    if (spec().type == TASK_TYPE_COMPUTE) {\n        spec().on_task_enqueue.execute(get_current_task(), this);\n    }\n\n    // for delayed tasks, refering to timer service\n    if (_delay_milliseconds != 0) {\n        pool->add_timer(this);\n        return;\n    }\n\n    // fast execution\n    if (_is_null) {\n        dassert(_node == task::get_current_node(), \"\");\n        exec_internal();\n        return;\n    }\n\n    if (_spec->allow_inline) {\n        // inlined\n        // warning - this may lead to deadlocks, e.g., allow_inlined\n        // task tries to get a non-recursive lock that is already hold\n        // by the caller task\n\n        if (_node != get_current_node()) {\n            tools::node_scoper ns(_node);\n            exec_internal();\n            return;\n        } else {\n            exec_internal();\n            return;\n        }\n    }\n\n    // normal path\n    pool->enqueue(this);\n}\n\nconst std::vector<task_worker *> &get_threadpool_threads_info(threadpool_code code)\n{\n    return dsn::task::get_current_node2()->computation()->get_pool(code)->workers();\n}\n\ntimer_task::timer_task(\n    task_code code, const task_handler &cb, int interval_milliseconds, int hash, service_node *node)\n    : task(code, hash, node), _interval_milliseconds(interval_milliseconds), _cb(cb)\n{\n    dassert(\n        TASK_TYPE_COMPUTE == spec().type,\n        \"%s is not a computation type task, please use DEFINE_TASK_CODE to define the task code\",\n        spec().name.c_str());\n}\n\ntimer_task::timer_task(\n    task_code code, task_handler &&cb, int interval_milliseconds, int hash, service_node *node)\n    : task(code, hash, node), _interval_milliseconds(interval_milliseconds), _cb(std::move(cb))\n{\n    dassert(\n        TASK_TYPE_COMPUTE == spec().type,\n        \"%s is not a computation type task, please use DEFINE_TASK_CODE to define the task code\",\n        spec().name.c_str());\n}\n\nvoid timer_task::enqueue()\n{\n    // enable timer randomization to avoid lots of timers execution simultaneously\n    if (delay_milliseconds() == 0 && spec().randomize_timer_delay_if_zero) {\n        set_delay(rand::next_u32(0, _interval_milliseconds));\n    }\n\n    return task::enqueue();\n}\n\nvoid timer_task::exec()\n{\n    if (dsn_likely(_cb != nullptr)) {\n        _cb();\n    }\n    // valid interval, we reset task state to READY\n    if (dsn_likely(_interval_milliseconds > 0)) {\n        dassert(set_retry(true),\n                \"timer task set retry failed, with state = %s\",\n                enum_to_string(state()));\n        set_delay(_interval_milliseconds);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/task_code.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/customizable_id.h>\n#include <dsn/tool-api/task_code.h>\n#include <dsn/tool-api/task_spec.h>\n\nnamespace dsn {\n\ntypedef dsn::utils::customized_id_mgr<dsn::task_code> task_code_mgr;\n\n/*static*/\nint task_code::max() { return task_code_mgr::instance().max_value(); }\n\n/*static*/\nbool task_code::is_exist(const char *name) { return task_code_mgr::instance().get_id(name) != -1; }\n\n/*static*/\ntask_code task_code::try_get(const char *name, task_code default_value)\n{\n    int code = task_code_mgr::instance().get_id(name);\n    if (code == -1)\n        return default_value;\n    return task_code(code);\n}\n\n/*static*/\ntask_code task_code::try_get(const std::string &name, task_code default_value)\n{\n    int code = task_code_mgr::instance().get_id(name);\n    if (code == -1)\n        return default_value;\n    return task_code(code);\n}\n\ntask_code::task_code(const char *name) : _internal_code(task_code_mgr::instance().register_id(name))\n{\n}\n\ntask_code::task_code(const char *name,\n                     dsn_task_type_t tt,\n                     dsn_task_priority_t pri,\n                     dsn::threadpool_code pool)\n    : task_code(name)\n{\n    task_spec::register_task_code(*this, tt, pri, pool);\n}\n\ntask_code::task_code(const char *name,\n                     dsn_task_type_t tt,\n                     dsn_task_priority_t pri,\n                     dsn::threadpool_code pool,\n                     bool is_storage_write,\n                     bool allow_batch,\n                     bool is_idempotent)\n    : task_code(name)\n{\n    task_spec::register_storage_task_code(\n        *this, tt, pri, pool, is_storage_write, allow_batch, is_idempotent);\n}\n\nconst char *task_code::to_string() const\n{\n    return task_code_mgr::instance().get_name(_internal_code);\n}\n}\n"
  },
  {
    "path": "src/runtime/task/task_engine.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/tool-api/command_manager.h>\n#include <fmt/format.h>\n\n#include \"task_engine.h\"\n\nusing namespace dsn::utils;\n\nnamespace dsn {\n\ntask_worker_pool::task_worker_pool(const threadpool_spec &opts, task_engine *owner)\n    : _spec(opts), _owner(owner), _node(owner->node()), _is_running(false)\n{\n}\n\nvoid task_worker_pool::create()\n{\n    if (_is_running)\n        return;\n\n    int qCount = _spec.partitioned ? _spec.worker_count : 1;\n    for (int i = 0; i < qCount; i++) {\n        auto q = factory_store<task_queue>::create(\n            _spec.queue_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, i, nullptr);\n        for (auto it = _spec.queue_aspects.begin(); it != _spec.queue_aspects.end(); ++it) {\n            q = factory_store<task_queue>::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, i, q);\n        }\n        _queues.push_back(q);\n    }\n\n    for (int i = 0; i < qCount; ++i) {\n        auto tsvc = factory_store<timer_service>::create(\n            service_engine::instance().spec().timer_factory_name.c_str(),\n            PROVIDER_TYPE_MAIN,\n            _node,\n            nullptr);\n        _per_queue_timer_svcs.push_back(tsvc);\n    }\n\n    for (int i = 0; i < _spec.worker_count; i++) {\n        auto q = _queues[qCount == 1 ? 0 : i];\n        task_worker *worker = factory_store<task_worker>::create(\n            _spec.worker_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, q, i, nullptr);\n        for (auto it = _spec.worker_aspects.begin(); it != _spec.worker_aspects.end(); ++it) {\n            worker = factory_store<task_worker>::create(\n                it->c_str(), PROVIDER_TYPE_ASPECT, this, q, i, worker);\n        }\n        task_worker::on_create.execute(worker);\n\n        _workers.push_back(worker);\n    }\n}\n\nvoid task_worker_pool::start()\n{\n    if (_is_running)\n        return;\n\n    for (auto &tsvc : _per_queue_timer_svcs) {\n        tsvc->start();\n    }\n    for (auto &wk : _workers) {\n        wk->start();\n    }\n\n    ddebug_f(\n        \"[{}]: thread pool [{}] started, pool_code = {}, worker_count = {}, worker_share_core = \"\n        \"{}, partitioned = {}, ...\",\n        _node->full_name(),\n        _spec.name,\n        _spec.pool_code.to_string(),\n        _spec.worker_count,\n        _spec.worker_share_core ? \"true\" : \"false\",\n        _spec.partitioned ? \"true\" : \"false\");\n\n    _is_running = true;\n}\n\nvoid task_worker_pool::stop()\n{\n    if (!_is_running) {\n        return;\n    }\n\n    for (auto &tsvc : _per_queue_timer_svcs) {\n        tsvc->stop();\n    }\n    for (auto &wk : _workers) {\n        wk->stop();\n    }\n    _is_running = false;\n    ddebug_f(\"[{}]: thread pool {} stopped\", _node->full_name(), _spec.name);\n}\n\nvoid task_worker_pool::add_timer(task *t)\n{\n    dassert(t->delay_milliseconds() > 0,\n            \"task delayed should be dispatched to timer service first\");\n\n    unsigned int idx = (_spec.partitioned\n                            ? static_cast<unsigned int>(t->hash()) %\n                                  static_cast<unsigned int>(_per_queue_timer_svcs.size())\n                            : 0);\n    _per_queue_timer_svcs[idx]->add_timer(t);\n}\n\nvoid task_worker_pool::enqueue(task *t)\n{\n    dassert(t->spec().pool_code == spec().pool_code || t->spec().type == TASK_TYPE_RPC_RESPONSE,\n            \"Invalid thread pool used\");\n    dassert(t->delay_milliseconds() == 0,\n            \"task delayed should be dispatched to timer service first\");\n\n    dassert(_is_running,\n            \"worker pool %s must be started before enqueue task %s\",\n            spec().name.c_str(),\n            t->spec().name.c_str());\n    unsigned int idx =\n        (_spec.partitioned\n             ? static_cast<unsigned int>(t->hash()) % static_cast<unsigned int>(_queues.size())\n             : 0);\n    return _queues[idx]->enqueue_internal(t);\n}\n\nbool task_worker_pool::shared_same_worker_with_current_task(task *tsk) const\n{\n    task *current = task::get_current_task();\n    if (nullptr != current) {\n        if (current->spec().pool_code != tsk->code())\n            return false;\n        else if (_workers.size() == 1)\n            return true;\n        else if (_spec.partitioned) {\n            unsigned int sz = static_cast<unsigned int>(_workers.size());\n            return static_cast<unsigned int>(current->hash()) % sz ==\n                   static_cast<unsigned int>(tsk->hash()) % sz;\n        } else {\n            return false;\n        }\n    } else {\n        return false;\n    }\n}\n\nvoid task_worker_pool::get_runtime_info(const std::string &indent,\n                                        const std::vector<std::string> &args,\n                                        /*out*/ std::stringstream &ss)\n{\n    std::string indent2 = indent + \"\\t\";\n    ss << indent << \"contains \" << _workers.size() << \" threads with \" << _queues.size()\n       << \" queues\" << std::endl;\n\n    for (auto &q : _queues) {\n        if (q) {\n            ss << indent2 << q->get_name() << \" now has \" << q->count() << \" pending tasks\"\n               << std::endl;\n        }\n    }\n\n    for (auto &wk : _workers) {\n        if (wk) {\n            ss << indent2 << wk->index() << \" (TID = \" << wk->native_tid()\n               << \") attached with queue \" << wk->queue()->get_name() << std::endl;\n        }\n    }\n}\nvoid task_worker_pool::get_queue_info(/*out*/ std::stringstream &ss)\n{\n    ss << \"[\";\n    bool first_flag = 0;\n    for (auto &q : _queues) {\n        if (q) {\n            if (!first_flag)\n                first_flag = 1;\n            else\n                ss << \",\";\n            ss << \"\\t\\t{\\\"name\\\":\\\"\" << q->get_name() << \"\\\",\\n\\t\\t\\\"num\\\":\" << q->count() << \"}\\n\";\n        }\n    }\n    ss << \"]\\n\";\n}\n\ntask_engine::task_engine(service_node *node)\n{\n    _is_running = false;\n    _node = node;\n    register_cli_commands();\n}\n\nvoid task_engine::create(const std::list<threadpool_code> &pools)\n{\n    if (_is_running)\n        return;\n\n    // init pools\n    _pools.resize(threadpool_code::max() + 1, nullptr);\n    for (auto &p : pools) {\n        auto &s = service_engine::instance().spec().threadpool_specs[p];\n        auto workerPool = new task_worker_pool(s, this);\n        workerPool->create();\n        // TODO(Tang yanzhao): _pools[_pools.size()-1] is always be null, fix it.\n        _pools[p] = workerPool;\n    }\n}\n\nvoid task_engine::start()\n{\n    if (_is_running)\n        return;\n\n    for (auto &pl : _pools) {\n        if (pl)\n            pl->start();\n    }\n    _is_running = true;\n    ddebug_f(\"[{}]: task engine started\", _node->full_name());\n}\n\nvoid task_engine::stop()\n{\n    if (!_is_running) {\n        return;\n    }\n\n    for (auto &pl : _pools) {\n        if (pl)\n            pl->stop();\n    }\n    _is_running = false;\n    ddebug_f(\"[{}]: task engine stopped\", _node->full_name());\n}\n\nvolatile int *task_engine::get_task_queue_virtual_length_ptr(dsn::task_code code, int hash)\n{\n    auto pl = get_pool(task_spec::get(code)->pool_code);\n    auto idx = (pl->spec().partitioned ? hash % pl->spec().worker_count : 0);\n    return pl->queues()[idx]->get_virtual_length_ptr();\n}\n\nvoid task_engine::get_runtime_info(const std::string &indent,\n                                   const std::vector<std::string> &args,\n                                   /*out*/ std::stringstream &ss)\n{\n    std::string indent2 = indent + \"\\t\";\n    for (auto &p : _pools) {\n        if (p) {\n            ss << indent << p->spec().pool_code.to_string() << std::endl;\n            p->get_runtime_info(indent2, args, ss);\n        }\n    }\n}\n\nvoid task_engine::get_queue_info(/*out*/ std::stringstream &ss)\n{\n    bool first_flag = 0;\n    for (auto &p : _pools) {\n        if (p) {\n            if (!first_flag)\n                first_flag = 1;\n            else\n                ss << \",\";\n            ss << \"\\t{\\\"pool_name\\\":\\\"\" << p->spec().pool_code.to_string()\n               << \"\\\",\\n\\t\\\"pool_queue\\\":\\n\";\n            p->get_queue_info(ss);\n            ss << \"}\\n\";\n        }\n    }\n}\n\nvoid task_engine::register_cli_commands()\n{\n    static std::once_flag flag;\n    std::call_once(flag, [&]() {\n        _task_queue_max_length_cmd = dsn::command_manager::instance().register_command(\n            {\"task.queue_max_length\"},\n            \"task.queue_max_length <pool_code> [queue_max_length]\",\n            \"get/set the max task queue length of specific thread_pool, you can set INT_MAX, to \"\n            \"set a big enough value, but you can't cancel delay/reject dynamically\",\n            [this](const std::vector<std::string> &args) {\n                if (args.size() < 1) {\n                    return std::string(\"ERR: invalid arguments, task.queue_max_length <pool_code> \"\n                                       \"[queue_max_length]\");\n                }\n\n                for (auto &it : _pools) {\n                    if (!it) {\n                        continue;\n                    }\n                    if (it->_spec.pool_code.to_string() == args[0]) {\n                        // when args length is 1, return current value\n                        if (args.size() == 1) {\n                            return fmt::format(\"task queue {}, length {}\",\n                                               args[0],\n                                               it->_spec.queue_length_throttling_threshold);\n                        }\n                        if (args.size() == 2) {\n                            int queue_length = INT_MAX;\n                            if ((args[1] != \"INT_MAX\") &&\n                                (!dsn::buf2int32(args[1], queue_length))) {\n                                return fmt::format(\"queue_max_length must >= 0, or set `INT_MAX`\");\n                            }\n                            if (queue_length < 0) {\n                                queue_length = INT_MAX;\n                            }\n                            it->_spec.queue_length_throttling_threshold = queue_length;\n                            return fmt::format(\"task queue {}, length {}\",\n                                               args[0],\n                                               it->_spec.queue_length_throttling_threshold);\n                        }\n                    }\n                }\n                return std::string(\"ERR: thread_pool not found\");\n            });\n    });\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/task_engine.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include \"runtime/service_engine.h\"\n#include <dsn/tool-api/task_queue.h>\n#include <dsn/tool-api/task_worker.h>\n#include <dsn/tool-api/timer_service.h>\n#include <dsn/tool-api/command_manager.h>\n\nnamespace dsn {\n\nclass task_engine;\nclass task_worker_pool;\nclass task_worker;\n\n//\n// a task_worker_pool is a set of TaskWorkers share the same configs;\n// they may even share the same task_queue when partitioned == false\n//\nclass task_worker_pool\n{\npublic:\n    task_worker_pool(const threadpool_spec &opts, task_engine *owner);\n\n    // service management\n    void create();\n    void start();\n    void stop();\n\n    // task procecessing\n    void enqueue(task *task);\n    void on_dequeue(int count);\n\n    // cached timer service access\n    void add_timer(task *task);\n\n    // inquery\n    const threadpool_spec &spec() const { return _spec; }\n    bool shared_same_worker_with_current_task(task *task) const;\n    task_engine *engine() const { return _owner; }\n    service_node *node() const { return _node; }\n    void get_runtime_info(const std::string &indent,\n                          const std::vector<std::string> &args,\n                          /*out*/ std::stringstream &ss);\n    void get_queue_info(/*out*/ std::stringstream &ss);\n    std::vector<task_queue *> &queues() { return _queues; }\n    std::vector<task_worker *> &workers() { return _workers; }\n\nprivate:\n    friend class task_engine;\n\n    threadpool_spec _spec;\n    task_engine *_owner;\n    service_node *_node;\n\n    std::vector<task_worker *> _workers;\n    std::vector<task_queue *> _queues;\n\n    std::vector<timer_service *> _per_queue_timer_svcs;\n\n    bool _is_running;\n};\n\nclass task_engine\n{\npublic:\n    task_engine(service_node *node);\n    ~task_engine()\n    {\n        stop();\n        UNREGISTER_VALID_HANDLER(_task_queue_max_length_cmd);\n    }\n\n    //\n    // service management routines\n    //\n    void create(const std::list<dsn::threadpool_code> &pools);\n    void start();\n    void stop();\n\n    //\n    // task management routines\n    //\n    task_worker_pool *get_pool(int code) const { return _pools[code]; }\n    std::vector<task_worker_pool *> &pools() { return _pools; }\n\n    bool is_started() const { return _is_running; }\n\n    volatile int *get_task_queue_virtual_length_ptr(dsn::task_code code, int hash);\n\n    service_node *node() const { return _node; }\n    void get_runtime_info(const std::string &indent,\n                          const std::vector<std::string> &args,\n                          /*out*/ std::stringstream &ss);\n    void get_queue_info(/*out*/ std::stringstream &ss);\n\nprivate:\n    void register_cli_commands();\n\n    std::vector<task_worker_pool *> _pools;\n    volatile bool _is_running;\n    service_node *_node;\n    dsn_handle_t _task_queue_max_length_cmd;\n};\n\n// -------------------- inline implementation ----------------------------\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/task_engine.sim.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/rand.h>\n#include \"task_engine.sim.h\"\n#include \"runtime/scheduler.h\"\n\nnamespace dsn {\nnamespace tools {\n\nvoid sim_timer_service::add_timer(task *task) { scheduler::instance().add_task(task, nullptr); }\n\nsim_task_queue::sim_task_queue(task_worker_pool *pool, int index, task_queue *inner_provider)\n    : task_queue(pool, index, inner_provider)\n{\n}\n\nvoid sim_task_queue::enqueue(task *t)\n{\n    dassert(0 == t->delay_milliseconds(), \"delay time must be zero\");\n    if (_tasks.size() > 0) {\n        do {\n            int random_pos = rand::next_u32(0, 1000000);\n            auto pr = _tasks.insert(std::map<uint32_t, task *>::value_type(random_pos, t));\n            if (pr.second)\n                break;\n        } while (true);\n    } else {\n        int random_pos = rand::next_u32(0, 1000000);\n        _tasks.insert(std::map<uint32_t, task *>::value_type(random_pos, t));\n    }\n\n    // for scheduling\n    if (task::get_current_worker()) {\n        scheduler::instance().wait_schedule(true, true);\n    }\n}\n\n// we always return 1 or 0 task in simulator\ntask *sim_task_queue::dequeue(/*inout*/ int &batch_size)\n{\n    scheduler::instance().wait_schedule(false);\n\n    if (_tasks.size() > 0) {\n        auto t = _tasks.begin()->second;\n        _tasks.erase(_tasks.begin());\n        batch_size = 1;\n        return t;\n    } else {\n        batch_size = 0;\n        return nullptr;\n    }\n}\n\nvoid sim_semaphore_provider::signal(int count)\n{\n    _count += count;\n\n    while (!_wait_threads.empty() && _count > 0) {\n        --_count;\n\n        sim_worker_state *thread = _wait_threads.front();\n        _wait_threads.pop_front();\n        thread->is_continuation_ready = true;\n    }\n}\n\nbool sim_semaphore_provider::wait(int timeout_milliseconds)\n{\n    if (_count > 0) {\n        --_count;\n        scheduler::instance().wait_schedule(true, true);\n        return true;\n    } else if (0 == timeout_milliseconds) {\n        scheduler::instance().wait_schedule(true, true);\n        return false;\n    } else {\n        // wait success\n        if (static_cast<unsigned int>(timeout_milliseconds) == TIME_MS_MAX ||\n            rand::next_double01() <= 0.5) {\n            _wait_threads.push_back(scheduler::task_worker_ext::get(task::get_current_worker()));\n            scheduler::instance().wait_schedule(true, false);\n            return true;\n        }\n\n        // timeout\n        else {\n            scheduler::instance().wait_schedule(true, true);\n            return false;\n        }\n    }\n}\n\nsim_lock_provider::sim_lock_provider(lock_provider *inner_provider)\n    : lock_provider(inner_provider), _sema(1, nullptr)\n{\n    _current_holder = -1;\n    _lock_depth = 0;\n}\n\nsim_lock_provider::~sim_lock_provider() {}\n\nvoid sim_lock_provider::lock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return;\n\n    int ctid = ::dsn::utils::get_current_tid();\n    if (ctid == _current_holder) {\n        ++_lock_depth;\n        return;\n    }\n\n    _sema.wait(TIME_MS_MAX);\n\n    dassert(-1 == _current_holder && _lock_depth == 0, \"must be unlocked state\");\n    _current_holder = ctid;\n    ++_lock_depth;\n}\n\nbool sim_lock_provider::try_lock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return true;\n\n    int ctid = ::dsn::utils::get_current_tid();\n    if (ctid == _current_holder) {\n        ++_lock_depth;\n        return true;\n    }\n\n    bool r = _sema.wait(0);\n    if (r) {\n        dassert(-1 == _current_holder && _lock_depth == 0, \"must be unlocked state\");\n        _current_holder = ctid;\n        ++_lock_depth;\n    }\n    return r;\n}\n\nvoid sim_lock_provider::unlock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return;\n\n    dassert(::dsn::utils::get_current_tid() == _current_holder,\n            \"lock must be locked must current holder\");\n\n    if (0 == --_lock_depth) {\n        _current_holder = -1;\n        _sema.signal(1);\n    } else {\n    }\n}\n\nsim_lock_nr_provider::sim_lock_nr_provider(lock_nr_provider *inner_provider)\n    : lock_nr_provider(inner_provider), _sema(1, nullptr)\n{\n    _current_holder = -1;\n    _lock_depth = 0;\n}\n\nsim_lock_nr_provider::~sim_lock_nr_provider() {}\n\nvoid sim_lock_nr_provider::lock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return;\n\n    int ctid = ::dsn::utils::get_current_tid();\n    dassert(ctid != _current_holder, \"non-recursive lock, error or use recursive locks instead\");\n\n    _sema.wait(TIME_MS_MAX);\n\n    dassert(-1 == _current_holder && _lock_depth == 0, \"must be unlocked state\");\n    _current_holder = ctid;\n    ++_lock_depth;\n}\n\nbool sim_lock_nr_provider::try_lock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return true;\n\n    int ctid = ::dsn::utils::get_current_tid();\n    dassert(ctid != _current_holder, \"non-recursive lock, error or use recursive locks instead\");\n\n    bool r = _sema.wait(0);\n    if (r) {\n        dassert(-1 == _current_holder && _lock_depth == 0, \"must be unlocked state\");\n        _current_holder = ctid;\n        ++_lock_depth;\n    }\n    return r;\n}\n\nvoid sim_lock_nr_provider::unlock()\n{\n    // ignore locks inside schedulers\n    if (scheduler::is_scheduling())\n        return;\n\n    dassert(::dsn::utils::get_current_tid() == _current_holder,\n            \"lock must be locked must current holder\");\n\n    if (0 == --_lock_depth) {\n        _current_holder = -1;\n        _sema.signal(1);\n    } else {\n        dassert(false, \"non-recursive lock, error or use recursive locks instead\");\n    }\n}\n}\n} // end namespace\n"
  },
  {
    "path": "src/runtime/task/task_engine.sim.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/priority_queue.h>\n#include <dsn/tool-api/timer_service.h>\n#include <dsn/tool-api/task_queue.h>\n#include \"utils/zlock_provider.h\"\n\nnamespace dsn {\nnamespace tools {\n\nclass sim_timer_service : public timer_service\n{\npublic:\n    sim_timer_service(service_node *node, timer_service *inner_provider)\n        : timer_service(node, inner_provider)\n    {\n    }\n\n    // after milliseconds, the provider should call task->enqueue()\n    virtual void add_timer(task *task) override;\n\n    virtual void start() override {}\n\n    virtual void stop() override {}\n};\n\nclass sim_task_queue : public task_queue\n{\npublic:\n    sim_task_queue(task_worker_pool *pool, int index, task_queue *inner_provider);\n\n    virtual void enqueue(task *task) override;\n    virtual task *dequeue(/*inout*/ int &batch_size) override;\n\nprivate:\n    std::map<uint32_t, task *> _tasks;\n};\n\nstruct sim_worker_state;\nclass sim_semaphore_provider : public semaphore_provider\n{\npublic:\n    sim_semaphore_provider(int initial_count, semaphore_provider *inner_provider)\n        : semaphore_provider(initial_count, inner_provider), _count(initial_count)\n    {\n    }\n\npublic:\n    virtual void signal(int count);\n    virtual bool wait(int timeout_milliseconds);\n\nprivate:\n    int _count;\n    std::list<sim_worker_state *> _wait_threads;\n};\n\nclass sim_lock_provider : public lock_provider\n{\npublic:\n    sim_lock_provider(lock_provider *inner_provider);\n    virtual ~sim_lock_provider();\n\n    virtual void lock();\n    virtual bool try_lock();\n    virtual void unlock();\n\nprivate:\n    int _lock_depth;     // 0 for not locked;\n    int _current_holder; // -1 for invalid\n    sim_semaphore_provider _sema;\n};\n\nclass sim_lock_nr_provider : public lock_nr_provider\n{\npublic:\n    sim_lock_nr_provider(lock_nr_provider *inner_provider);\n    virtual ~sim_lock_nr_provider();\n\n    virtual void lock();\n    virtual bool try_lock();\n    virtual void unlock();\n\nprivate:\n    int _lock_depth;     // 0 for not locked;\n    int _current_holder; // -1 for invalid\n    sim_semaphore_provider _sema;\n};\n\n// degrade to lock_nr for simplicity\nclass sim_rwlock_nr_provider : public rwlock_nr_provider\n{\npublic:\n    sim_rwlock_nr_provider(rwlock_nr_provider *inner_provider)\n        : rwlock_nr_provider(inner_provider), _l(nullptr)\n    {\n    }\n\n    virtual ~sim_rwlock_nr_provider() {}\n\n    virtual void lock_read() { return _l.lock(); }\n    virtual void unlock_read() { return _l.unlock(); }\n    virtual bool try_lock_read() { return _l.try_lock(); }\n\n    virtual void lock_write() { return _l.lock(); }\n    virtual void unlock_write() { return _l.unlock(); }\n    virtual bool try_lock_write() { return _l.try_lock(); }\n\nprivate:\n    sim_lock_nr_provider _l;\n};\n}\n} // end namespace\n"
  },
  {
    "path": "src/runtime/task/task_queue.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool-api/task_queue.h>\n#include \"task_engine.h\"\n#include <dsn/tool-api/network.h>\n#include <dsn/dist/fmt_logging.h>\n#include \"runtime/rpc/rpc_engine.h\"\n\nnamespace dsn {\n\ntask_queue::task_queue(task_worker_pool *pool, int index, task_queue *inner_provider)\n    : _pool(pool), _queue_length(0)\n{\n    char num[30];\n    sprintf(num, \"%u\", index);\n    _index = index;\n    _name = pool->spec().name + '.';\n    _name.append(num);\n    _queue_length_counter.init_global_counter(_pool->node()->full_name(),\n                                              \"engine\",\n                                              (_name + \".queue.length\").c_str(),\n                                              COUNTER_TYPE_NUMBER,\n                                              \"task queue length\");\n    _delay_task_counter.init_global_counter(_pool->node()->full_name(),\n                                            \"engine\",\n                                            (_name + \".queue.delay_task\").c_str(),\n                                            COUNTER_TYPE_VOLATILE_NUMBER,\n                                            \"delay count of tasks before enqueue\");\n    _reject_task_counter.init_global_counter(_pool->node()->full_name(),\n                                             \"engine\",\n                                             (_name + \".queue.reject_task\").c_str(),\n                                             COUNTER_TYPE_VOLATILE_NUMBER,\n                                             \"reject count of tasks before enqueue\");\n    _virtual_queue_length = 0;\n    _spec = (threadpool_spec *)&pool->spec();\n}\n\ntask_queue::~task_queue() = default;\n\n// This function is used to throttle tasks before they enter the queue\n// `queue_length_throttling_threshold` is configured by task pool\n// `throttling_mode` is configured by the specific task\n// Because not all tasks in the queue can handle the `ERR_BUSY` exception\nvoid task_queue::enqueue_internal(task *task)\n{\n    auto &sp = task->spec();\n    auto throttle_mode = sp.rpc_request_throttling_mode;\n    if (throttle_mode != TM_NONE) {\n        int ac_value = 0;\n        if (_spec->enable_virtual_queue_throttling) {\n            ac_value = _virtual_queue_length;\n        } else {\n            ac_value = count();\n        }\n\n        if (throttle_mode == TM_DELAY) {\n            int delay_ms =\n                sp.rpc_request_delayer.delay(ac_value, _spec->queue_length_throttling_threshold);\n            if (delay_ms > 0) {\n                auto rtask = static_cast<rpc_request_task *>(task);\n                if (rtask->get_request()->io_session->delay_recv(delay_ms)) {\n                    _delay_task_counter->increment();\n                }\n            }\n        } else {\n            dbg_dassert(TM_REJECT == throttle_mode, \"unknow mode %d\", (int)throttle_mode);\n\n            if (ac_value > _spec->queue_length_throttling_threshold) {\n                auto rtask = static_cast<rpc_request_task *>(task);\n                auto resp = rtask->get_request()->create_response();\n                task::get_current_rpc()->reply(resp, ERR_BUSY);\n                _reject_task_counter->increment();\n                task->release_ref(); // added in task::enqueue(pool)\n                return;\n            }\n        }\n    }\n\n    tls_dsn.last_worker_queue_size = increase_count();\n    enqueue(task);\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/task/task_spec.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <array>\n\n#include <dsn/tool-api/task_spec.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/threadpool_spec.h>\n#include <dsn/utility/smart_pointers.h>\n\nnamespace dsn {\n\nconstexpr int TASK_SPEC_STORE_CAPACITY = 512;\n\nstd::set<dsn::task_code> &get_storage_rpc_req_codes()\n{\n    static std::set<dsn::task_code> s_storage_rpc_req_codes;\n    return s_storage_rpc_req_codes;\n}\n\n// A sequential storage maps task_code to task_spec.\nstatic std::array<std::unique_ptr<task_spec>, TASK_SPEC_STORE_CAPACITY> s_task_spec_store;\n\nvoid task_spec::register_task_code(task_code code,\n                                   dsn_task_type_t type,\n                                   dsn_task_priority_t pri,\n                                   dsn::threadpool_code pool)\n{\n    dassert(code < TASK_SPEC_STORE_CAPACITY, \"code = %d\", code);\n    if (!s_task_spec_store[code]) {\n        s_task_spec_store[code] = make_unique<task_spec>(code, code.to_string(), type, pri, pool);\n        auto &spec = s_task_spec_store[code];\n\n        if (type == TASK_TYPE_RPC_REQUEST) {\n            std::string ack_name = std::string(code.to_string()) + std::string(\"_ACK\");\n            // for a rpc request, we firstly register it's ack code to invalid threadpool,\n            // then the response code's definition will reassign a proper valid threadpool code.\n            // please refer to the DEFINE_TASK_CODE_RPC/DEFINE_STORAGE_RPC_CODE in task_code.h\n            // for more details.\n            dsn::task_code ack_code(\n                ack_name.c_str(), TASK_TYPE_RPC_RESPONSE, pri, THREAD_POOL_INVALID);\n            spec->rpc_paired_code = ack_code;\n            task_spec::get(ack_code.code())->rpc_paired_code = code;\n        }\n    } else {\n        auto spec = task_spec::get(code);\n        if (spec->type != type) {\n            dassert(\n                false,\n                \"task code %s registerd for %s, which does not match with previously registered %s\",\n                code.to_string(),\n                enum_to_string(type),\n                enum_to_string(spec->type));\n            return;\n        }\n\n        if (spec->priority != pri) {\n            dwarn(\"overwrite priority for task %s from %s to %s\",\n                  code.to_string(),\n                  enum_to_string(spec->priority),\n                  enum_to_string(pri));\n            spec->priority = pri;\n        }\n\n        if (spec->pool_code != pool) {\n            if (spec->pool_code != THREAD_POOL_INVALID) {\n                dwarn(\"overwrite default thread pool for task %s from %s to %s\",\n                      code.to_string(),\n                      spec->pool_code.to_string(),\n                      pool.to_string());\n            }\n            spec->pool_code = pool;\n        }\n    }\n}\n\nvoid task_spec::register_storage_task_code(task_code code,\n                                           dsn_task_type_t type,\n                                           dsn_task_priority_t pri,\n                                           threadpool_code pool,\n                                           bool is_write_operation,\n                                           bool allow_batch,\n                                           bool is_idempotent)\n{\n    register_task_code(code, type, pri, pool);\n    task_spec *spec = task_spec::get(code);\n    spec->rpc_request_for_storage = true;\n    spec->rpc_request_is_write_operation = is_write_operation;\n    spec->rpc_request_is_write_allow_batch = allow_batch;\n    spec->rpc_request_is_write_idempotent = is_idempotent;\n    if (TASK_TYPE_RPC_REQUEST == type) {\n        get_storage_rpc_req_codes().insert(code);\n    }\n}\n\ntask_spec *task_spec::get(int code)\n{\n    return code < TASK_SPEC_STORE_CAPACITY ? s_task_spec_store[code].get() : nullptr;\n}\n\ntask_spec::task_spec(int code,\n                     const char *name,\n                     dsn_task_type_t type,\n                     dsn_task_priority_t pri,\n                     dsn::threadpool_code pool)\n    : code(code),\n      type(type),\n      name(name),\n      rpc_paired_code(TASK_CODE_INVALID),\n      rpc_request_for_storage(false),\n      rpc_request_is_write_operation(false),\n      rpc_request_is_write_allow_batch(false),\n      rpc_request_is_write_idempotent(false),\n      priority(pri),\n      pool_code(pool),\n      rpc_call_header_format(NET_HDR_DSN),\n      rpc_call_channel(RPC_CHANNEL_TCP),\n      rpc_message_crc_required(false),\n      on_task_create((std::string(name) + std::string(\".create\")).c_str()),\n      on_task_enqueue((std::string(name) + std::string(\".enqueue\")).c_str()),\n      on_task_begin((std::string(name) + std::string(\".begin\")).c_str()),\n      on_task_end((std::string(name) + std::string(\".end\")).c_str()),\n      on_task_cancelled((std::string(name) + std::string(\".cancelled\")).c_str()),\n\n      on_task_wait_pre((std::string(name) + std::string(\".wait.pre\")).c_str()),\n      on_task_wait_notified((std::string(name) + std::string(\".wait.notified\")).c_str()),\n      on_task_wait_post((std::string(name) + std::string(\".wait.post\")).c_str()),\n      on_task_cancel_post((std::string(name) + std::string(\".cancel.post\")).c_str()),\n\n      on_aio_call((std::string(name) + std::string(\".aio.call\")).c_str()),\n      on_aio_enqueue((std::string(name) + std::string(\".aio.enqueue\")).c_str()),\n\n      on_rpc_call((std::string(name) + std::string(\".rpc.call\")).c_str()),\n      on_rpc_request_enqueue((std::string(name) + std::string(\".rpc.request.enqueue\")).c_str()),\n      on_rpc_task_dropped((std::string(name) + std::string(\".dropped\")).c_str()),\n      on_rpc_reply((std::string(name) + std::string(\".rpc.reply\")).c_str()),\n      on_rpc_response_enqueue((std::string(name) + std::string(\".rpc.response.enqueue\")).c_str()),\n      on_rpc_create_response((std::string(name) + std::string(\".rpc.response.create\")).c_str())\n{\n    dassert(strlen(name) < DSN_MAX_TASK_CODE_NAME_LENGTH,\n            \"task code name '%s' is too long: length must be smaller than \"\n            \"DSN_MAX_TASK_CODE_NAME_LENGTH (%u)\",\n            name,\n            DSN_MAX_TASK_CODE_NAME_LENGTH);\n\n    rpc_call_channel = RPC_CHANNEL_TCP;\n    rpc_timeout_milliseconds = 5 * 1000; // 5 seconds\n}\n\nbool task_spec::init()\n{\n    /*\n    [task..default]\n    is_trace = false\n    is_profile = false\n\n    [task.RPC_PREPARE]\n    pool_code = THREAD_POOL_REPLICATION\n    priority = TASK_PRIORITY_HIGH\n    is_trace = true\n    is_profile = true\n    */\n\n    task_spec default_spec(\n        0, \"placeholder\", TASK_TYPE_COMPUTE, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT);\n    if (!read_config(\"task..default\", default_spec))\n        return false;\n\n    for (int code = 0; code <= dsn::task_code::max(); code++) {\n        if (code == TASK_CODE_INVALID)\n            continue;\n\n        std::string section_name =\n            std::string(\"task.\") + std::string(dsn::task_code(code).to_string());\n        task_spec *spec = task_spec::get(code);\n        dassert(spec != nullptr, \"task_spec cannot be null\");\n\n        if (!read_config(section_name.c_str(), *spec, &default_spec))\n            return false;\n\n        if (code == TASK_CODE_EXEC_INLINED) {\n            spec->allow_inline = true;\n        }\n\n        dassert(spec->rpc_request_delays_milliseconds.size() == 0 ||\n                    spec->rpc_request_delays_milliseconds.size() == 6,\n                \"invalid length of rpc_request_delays_milliseconds, must be of length 6\");\n        if (spec->rpc_request_delays_milliseconds.size() > 0) {\n            spec->rpc_request_delayer.initialize(spec->rpc_request_delays_milliseconds);\n        }\n\n        if (spec->rpc_request_throttling_mode != TM_NONE) {\n            if (spec->type != TASK_TYPE_RPC_REQUEST) {\n                derror(\"%s: only rpc request type can have non TM_NONE throttling_mode\",\n                       spec->name.c_str());\n                return false;\n            }\n        }\n    }\n\n    ::dsn::command_manager::instance().register_command(\n        {\"task-code\"},\n        \"task-code - query task code containing any given keywords\",\n        \"task-code keyword1 keyword2 ...\",\n        [](const std::vector<std::string> &args) {\n            std::stringstream ss;\n\n            for (int code = 0; code <= dsn::task_code::max(); code++) {\n                if (code == TASK_CODE_INVALID)\n                    continue;\n\n                std::string codes = dsn::task_code(code).to_string();\n                if (args.size() == 0) {\n                    ss << \"    \" << codes << std::endl;\n                } else {\n                    for (auto &arg : args) {\n                        if (codes.find(arg.c_str()) != std::string::npos) {\n                            ss << \"    \" << codes << std::endl;\n                        }\n                    }\n                }\n            }\n            return ss.str();\n        });\n\n    return true;\n}\n\nbool threadpool_spec::init(/*out*/ std::vector<threadpool_spec> &specs)\n{\n    /*\n    [threadpool..default]\n    worker_count = 4\n    worker_priority = THREAD_xPRIORITY_NORMAL\n    partitioned = false\n    queue_aspects = xxx\n    worker_aspects = xxx\n\n    [threadpool.THREAD_POOL_REPLICATION]\n    name = Thr.replication\n    run = true\n    worker_count = 4\n    worker_priority = THREAD_xPRIORITY_NORMAL\n    partitioned = false\n    queue_aspects = xxx\n    worker_aspects = xxx\n    */\n\n    threadpool_spec default_spec(THREAD_POOL_INVALID);\n    if (false == read_config(\"threadpool..default\", default_spec, nullptr))\n        return false;\n\n    default_spec.name = \"\";\n    specs.clear();\n    for (int code = 0; code <= threadpool_code::max(); code++) {\n        std::string code_name = std::string(threadpool_code(code).to_string());\n        std::string section_name = std::string(\"threadpool.\") + code_name;\n        threadpool_spec spec(default_spec);\n        if (false == read_config(section_name.c_str(), spec, &default_spec))\n            return false;\n\n        spec.pool_code = threadpool_code(code);\n\n        if (\"\" == spec.name)\n            spec.name = code_name;\n\n        if (false == spec.worker_share_core && 0 == spec.worker_affinity_mask) {\n            spec.worker_affinity_mask = (1 << std::thread::hardware_concurrency()) - 1;\n        }\n\n        specs.push_back(spec);\n    }\n\n    return true;\n}\n\n} // end namespace\n"
  },
  {
    "path": "src/runtime/task/task_tracker.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool-api/task_tracker.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool_api.h>\n\nnamespace dsn {\n\ntask_tracker::task_tracker(int task_bucket_count) : _task_bucket_count(task_bucket_count)\n{\n    _outstanding_tasks_lock = new ::dsn::utils::ex_lock_nr_spin[_task_bucket_count];\n    _outstanding_tasks = new dlink[_task_bucket_count];\n}\n\ntask_tracker::~task_tracker()\n{\n    cancel_outstanding_tasks();\n\n    delete[] _outstanding_tasks;\n    delete[] _outstanding_tasks_lock;\n}\n\n// TODO:\n// hack for wait/cancel inside spin locks\nstruct tls_tracker_hack\n{\n    unsigned int magic;\n    bool is_simulator;\n\n    bool under_simulation()\n    {\n        if (magic != 0xdeadbeef) {\n            is_simulator = (dsn::tools::get_current_tool()->name() == \"simulator\");\n            magic = 0xdeadbeef;\n        }\n        return is_simulator;\n    }\n};\n\nstatic __thread tls_tracker_hack s_hack;\n\nvoid task_tracker::wait_outstanding_tasks()\n{\n    for (int i = 0; i < _task_bucket_count; i++) {\n        while (true) {\n            trackable_task::owner_delete_state prepare_state;\n            trackable_task *tcm;\n\n            {\n                utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_outstanding_tasks_lock[i]);\n                auto n = _outstanding_tasks[i].next();\n                if (n != &_outstanding_tasks[i]) {\n                    tcm = CONTAINING_RECORD(n, trackable_task, _dl);\n\n                    // try to get the lock\n                    prepare_state = tcm->owner_delete_prepare();\n                } else\n                    break; // assuming nobody is putting tasks into it anymore\n            }\n\n            switch (prepare_state) {\n            // tracker get the lock\n            case trackable_task::OWNER_DELETE_NOT_LOCKED:\n                if (s_hack.under_simulation()) {\n                    task *tsk = tcm->_task;\n                    tsk->add_ref(); // released after delete commit\n                    tcm->owner_delete_commit();\n\n                    tsk->wait();        // wait outside the delete spin lock\n                    tsk->release_ref(); // added before delete commit\n                } else {\n                    tcm->_task->wait();\n                    tcm->owner_delete_commit();\n                }\n                break;\n            case trackable_task::OWNER_DELETE_LOCKED:\n            case trackable_task::OWNER_DELETE_FINISHED:\n                break;\n            }\n        }\n    }\n}\n\nvoid task_tracker::cancel_outstanding_tasks()\n{\n    for (int i = 0; i < _task_bucket_count; i++) {\n        while (true) {\n            trackable_task::owner_delete_state prepare_state;\n            trackable_task *tcm;\n\n            {\n                utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_outstanding_tasks_lock[i]);\n                auto n = _outstanding_tasks[i].next();\n                if (n != &_outstanding_tasks[i]) {\n                    tcm = CONTAINING_RECORD(n, trackable_task, _dl);\n                    prepare_state = tcm->owner_delete_prepare();\n                } else\n                    break; // assuming nobody is putting tasks into it anymore\n            }\n\n            switch (prepare_state) {\n            case trackable_task::OWNER_DELETE_NOT_LOCKED:\n                if (s_hack.under_simulation()) {\n                    task *tsk = tcm->_task;\n                    tsk->add_ref(); // released after delete commit\n                    tcm->owner_delete_commit();\n\n                    tsk->cancel(true);  // cancel outside the delete spin lock\n                    tsk->release_ref(); // added before delete commit\n                } else {\n                    tcm->_task->cancel(true);\n                    tcm->owner_delete_commit();\n                }\n                break;\n            case trackable_task::OWNER_DELETE_LOCKED:\n            case trackable_task::OWNER_DELETE_FINISHED:\n                break;\n            }\n        }\n    }\n}\n\nint task_tracker::cancel_but_not_wait_outstanding_tasks()\n{\n    int not_finished = 0;\n    for (int i = 0; i < _task_bucket_count; i++) {\n        utils::auto_lock<::dsn::utils::ex_lock_nr_spin> l(_outstanding_tasks_lock[i]);\n        auto n = _outstanding_tasks[i].next();\n        if (n != &_outstanding_tasks[i]) {\n            trackable_task *tcm = CONTAINING_RECORD(n, trackable_task, _dl);\n            if (tcm->_task != task::get_current_task()) {\n                bool finished;\n                tcm->_task->cancel(false, &finished);\n                if (!finished)\n                    not_finished++;\n            }\n        }\n    }\n    return not_finished;\n}\n}\n"
  },
  {
    "path": "src/runtime/task/task_worker.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#if defined(__linux__)\n#include <sys/prctl.h>\n#endif // defined(__linux__)\n\n#include <sstream>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/smart_pointers.h>\n\n#include \"task_engine.h\"\n\nnamespace dsn {\n\njoin_point<void, task_worker *> task_worker::on_start(\"task_worker::on_start\");\njoin_point<void, task_worker *> task_worker::on_create(\"task_worker::on_create\");\n\ntask_worker::task_worker(task_worker_pool *pool,\n                         task_queue *q,\n                         int index,\n                         task_worker *inner_provider)\n{\n    _owner_pool = pool;\n    _input_queue = q;\n    _index = index;\n    _native_tid = ::dsn::utils::INVALID_TID;\n\n    char name[256];\n    sprintf(name, \"%5s.%s.%u\", pool->node()->full_name(), pool->spec().name.c_str(), index);\n    _name = std::string(name);\n    _is_running = false;\n\n    _thread = nullptr;\n    _processed_task_count = 0;\n}\n\ntask_worker::~task_worker()\n{\n    if (!_is_running)\n        return;\n\n    // TODO(wutao1): use join, detach is not work with valgrind\n    _thread->detach();\n}\n\nvoid task_worker::start()\n{\n    if (_is_running)\n        return;\n\n    _is_running = true;\n\n    _thread = make_unique<std::thread>(std::bind(&task_worker::run_internal, this));\n\n    _started.wait();\n}\n\nvoid task_worker::stop()\n{\n    if (!_is_running)\n        return;\n\n    _is_running = false;\n\n    _thread->join();\n}\n\nvoid task_worker::set_name(const char *name)\n{\n#if defined(__linux__)\n    // http://0pointer.de/blog/projects/name-your-threads.html\n    // Set the name for the LWP (which gets truncated to 15 characters).\n    // Note that glibc also has a 'pthread_setname_np' api, but it may not be\n    // available everywhere and it's only benefit over using prctl directly is\n    // that it can set the name of threads other than the current thread.\n    int err = prctl(PR_SET_NAME, name);\n#else\n    int err = pthread_setname_np(name);\n#endif // defined(__linux__)\n    // We expect EPERM failures in sandboxed processes, just ignore those.\n    if (err < 0 && errno != EPERM) {\n        dwarn(\"Fail to set pthread name. err = %d\", err);\n    }\n}\n\nvoid task_worker::set_priority(worker_priority_t pri)\n{\n    static int prio_max = -20;\n    static int prio_min = 19;\n    static int prio_middle = ((prio_min + prio_max + 1) / 2);\n\n    static int g_thread_priority_map[] = {prio_min,\n                                          (prio_min + prio_middle) / 2,\n                                          prio_middle,\n                                          (prio_middle + prio_max) / 2,\n                                          prio_max};\n\n    static_assert(ARRAYSIZE(g_thread_priority_map) == THREAD_xPRIORITY_COUNT,\n                  \"ARRAYSIZE(g_thread_priority_map) != THREAD_xPRIORITY_COUNT\");\n\n    int prio = g_thread_priority_map[static_cast<int>(pri)];\n    bool succ = true;\n    if ((nice(prio) == -1) && (errno != 0)) {\n        succ = false;\n    }\n    if (!succ) {\n        dwarn(\"You may need priviledge to set thread priority. errno = %d\", errno);\n    }\n}\n\nvoid task_worker::set_affinity(uint64_t affinity)\n{\n#if defined(__linux__)\n    dassert(affinity > 0, \"affinity cannot be 0.\");\n\n    int nr_cpu = static_cast<int>(std::thread::hardware_concurrency());\n    if (nr_cpu < 64) {\n        dassert(affinity <= (((uint64_t)1 << nr_cpu) - 1),\n                \"There are %d cpus in total, while setting thread affinity to a nonexistent one.\",\n                nr_cpu);\n    }\n\n    int err = 0;\n    cpu_set_t cpuset;\n    int nr_bits = std::min(nr_cpu, static_cast<int>(sizeof(affinity) * 8));\n\n    CPU_ZERO(&cpuset);\n    for (int i = 0; i < nr_bits; i++) {\n        if ((affinity & ((uint64_t)1 << i)) != 0) {\n            CPU_SET(i, &cpuset);\n        }\n    }\n    err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);\n\n    if (err != 0) {\n        dwarn(\"Fail to set thread affinity. err = %d\", err);\n    }\n#endif // defined(__linux__)\n}\n\nvoid task_worker::run_internal()\n{\n    while (_thread == nullptr) {\n        std::this_thread::sleep_for(std::chrono::milliseconds(1));\n    }\n\n    task::set_tls_dsn_context(pool()->node(), this);\n\n    _native_tid = ::dsn::utils::get_current_tid();\n    set_name(name().c_str());\n    set_priority(pool_spec().worker_priority);\n\n    if (true == pool_spec().worker_share_core) {\n        if (pool_spec().worker_affinity_mask > 0) {\n            set_affinity(pool_spec().worker_affinity_mask);\n        }\n    } else {\n        uint64_t current_mask = pool_spec().worker_affinity_mask;\n        if (0 == current_mask) {\n            derror(\"mask for %s is set to 0x0, mostly due to that #core > 64, set to 64 now\",\n                   pool_spec().name.c_str());\n\n            current_mask = ~((uint64_t)0);\n        }\n        for (int i = 0; i < _index; ++i) {\n            current_mask &= (current_mask - 1);\n            if (0 == current_mask) {\n                current_mask = pool_spec().worker_affinity_mask;\n            }\n        }\n        current_mask -= (current_mask & (current_mask - 1));\n\n        set_affinity(current_mask);\n    }\n\n    _started.notify();\n\n    on_start.execute(this);\n\n    loop();\n}\n\nvoid task_worker::loop()\n{\n    task_queue *q = queue();\n    int best_batch_size = pool_spec().dequeue_batch_size;\n\n    while (_is_running) {\n        int batch_size = best_batch_size;\n        task *task = q->dequeue(batch_size), *next;\n\n        q->decrease_count(batch_size);\n\n#ifndef NDEBUG\n        int count = 0;\n#endif\n        while (task != nullptr) {\n            next = task->next;\n            task->next = nullptr;\n            task->exec_internal();\n            task = next;\n#ifndef NDEBUG\n            count++;\n#endif\n        }\n\n#ifndef NDEBUG\n        dassert(count == batch_size,\n                \"returned task count and batch size do not match: %d vs %d\",\n                count,\n                batch_size);\n#endif\n\n        _processed_task_count += batch_size;\n    }\n}\n\nconst threadpool_spec &task_worker::pool_spec() const { return pool()->spec(); }\n\n} // end namespace\n"
  },
  {
    "path": "src/runtime/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_runtime_tests)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS gtest\n                 dsn_runtime\n                 dsn_aio\n                 )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"${CMAKE_CURRENT_SOURCE_DIR}/config-test-corrupt-message.ini\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/config-test.ini\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/config-test-sim.ini\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/command.txt\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n                 \"${CMAKE_CURRENT_SOURCE_DIR}/gtest.filter\"\n)\nadd_definitions(-Wno-dangling-else)\ndsn_add_test()\n"
  },
  {
    "path": "src/runtime/test/async_call.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for clientlet.\n *\n * Revision history:\n *     Nov., 2015, @shengofsun (Weijie Sun), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/tool-api/thread_access_checker.h>\n#include <dsn/service_api_cpp.h>\n\n#include <gtest/gtest.h>\n#include <functional>\n#include <chrono>\n\n#include \"test_utils.h\"\n\nDEFINE_TASK_CODE(LPC_TEST_CLIENTLET, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\nusing namespace dsn;\n\nint global_value;\nclass tracker_class\n{\npublic:\n    std::string str;\n    int number;\n    dsn::task_tracker _tracker;\n    dsn::thread_access_checker _checker;\n\npublic:\n    tracker_class() : str(\"before called\"), number(0), _tracker(1) { global_value = 0; }\n    void callback_function1()\n    {\n        _checker.only_one_thread_access();\n        str = \"after called\";\n        ++global_value;\n    }\n\n    void callback_function2()\n    {\n        _checker.only_one_thread_access();\n        number = 0;\n        for (int i = 0; i < 1000; ++i)\n            number += i;\n        ++global_value;\n    }\n\n    void callback_function3() { ++global_value; }\n};\n\nTEST(async_call, task_call)\n{\n    /* normal lpc*/\n    tracker_class *tc = new tracker_class();\n    task_ptr t =\n        tasking::enqueue(LPC_TEST_CLIENTLET, &tc->_tracker, [tc] { tc->callback_function1(); });\n    EXPECT_TRUE(t != nullptr);\n    t->wait();\n    EXPECT_TRUE(tc->str == \"after called\");\n    delete tc;\n\n    /* task tracking */\n    tc = new tracker_class();\n    std::vector<task_ptr> test_tasks;\n    t = tasking::enqueue(LPC_TEST_CLIENTLET,\n                         &tc->_tracker,\n                         [=] { tc->callback_function1(); },\n                         0,\n                         std::chrono::seconds(30));\n    test_tasks.push_back(t);\n    t = tasking::enqueue(LPC_TEST_CLIENTLET,\n                         &tc->_tracker,\n                         [tc] { tc->callback_function1(); },\n                         0,\n                         std::chrono::seconds(30));\n    test_tasks.push_back(t);\n    t = tasking::enqueue_timer(LPC_TEST_CLIENTLET,\n                               &tc->_tracker,\n                               [tc] { tc->callback_function1(); },\n                               std::chrono::seconds(20),\n                               0,\n                               std::chrono::seconds(30));\n    test_tasks.push_back(t);\n\n    delete tc;\n    for (unsigned int i = 0; i != test_tasks.size(); ++i)\n        EXPECT_FALSE(test_tasks[i]->cancel(true));\n}\n\nTEST(async_call, rpc_call)\n{\n    rpc_address addr(\"localhost\", 20101);\n    rpc_address addr2(\"localhost\", TEST_PORT_END);\n    rpc_address addr3(\"localhost\", 32767);\n\n    tracker_class *tc = new tracker_class();\n    rpc::call_one_way_typed(addr, RPC_TEST_STRING_COMMAND, std::string(\"expect_no_reply\"), 0);\n    std::vector<task_ptr> task_vec;\n    const char *command = \"echo hello world\";\n\n    std::shared_ptr<std::string> str_command(new std::string(command));\n    auto t = rpc::call(addr3,\n                       RPC_TEST_STRING_COMMAND,\n                       *str_command,\n                       &tc->_tracker,\n                       [str_command](error_code ec, std::string &&resp) {\n                           if (ERR_OK == ec)\n                               EXPECT_TRUE(str_command->substr(5) == resp);\n                       });\n    task_vec.push_back(t);\n    t = rpc::call(addr2,\n                  RPC_TEST_STRING_COMMAND,\n                  std::string(command),\n                  &tc->_tracker,\n                  [](error_code ec, std::string &&resp) { EXPECT_TRUE(ec == ERR_OK); });\n    task_vec.push_back(t);\n    for (int i = 0; i != task_vec.size(); ++i)\n        task_vec[i]->wait();\n\n    delete tc;\n}\n\nclass simple_task : public dsn::raw_task\n{\npublic:\n    simple_task(dsn::task_code code, const task_handler &h) : dsn::raw_task(code, h, 0, nullptr)\n    {\n        ddebug(\"simple task %p created\", this);\n        allocate_count++;\n    }\n    virtual ~simple_task() override\n    {\n        ddebug(\"simple task %p is deallocated\", this);\n        allocate_count--;\n    }\n    static std::atomic_int allocate_count;\n};\n\nclass simple_task_container : public dsn::ref_counter\n{\npublic:\n    dsn::task_ptr t;\n};\n\nclass simple_rpc_response_task : public dsn::rpc_response_task\n{\npublic:\n    simple_rpc_response_task(dsn::message_ex *m, const rpc_response_handler &h)\n        : dsn::rpc_response_task(m, h)\n    {\n        ddebug(\"simple rpc response task(%p) created\", this);\n        allocate_count++;\n    }\n    virtual ~simple_rpc_response_task() override\n    {\n        ddebug(\"simple rpc repsonse task(%p) is dealloate\", this);\n        allocate_count--;\n    }\n    static std::atomic_int allocate_count;\n};\n\nstd::atomic_int simple_task::allocate_count(0);\nstd::atomic_int simple_rpc_response_task::allocate_count(0);\n\nDEFINE_TASK_CODE_RPC(TEST_CODE, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nbool spin_wait(const std::function<bool()> &pred, int wait_times)\n{\n    for (int i = 0; i != wait_times; ++i) {\n        if (pred())\n            return true;\n        std::this_thread::sleep_for(std::chrono::milliseconds(100));\n    }\n    return pred();\n}\nTEST(async_call, task_destructor)\n{\n    {\n        task_ptr t(new simple_task(LPC_TEST_CLIENTLET, nullptr));\n        t->enqueue();\n        t->wait();\n    }\n    ASSERT_TRUE(spin_wait([&]() { return simple_task::allocate_count.load() == 0; }, 10));\n\n    dsn::ref_ptr<dsn::message_ex> req = message_ex::create_request(TEST_CODE);\n    {\n        dsn::rpc_response_task_ptr t(new simple_rpc_response_task(req.get(), nullptr));\n        t->enqueue(dsn::ERR_OK, nullptr);\n        t->wait();\n    }\n    ASSERT_TRUE(\n        spin_wait([&]() { return simple_rpc_response_task::allocate_count.load() == 0; }, 10));\n\n    {\n        dsn::rpc_response_task_ptr t(new simple_rpc_response_task(req.get(), nullptr));\n        t->replace_callback([t](dsn::error_code, dsn::message_ex *, dsn::message_ex *) {\n            // ref_ptr out of callback + ref_ptr in callback + ref_added_in_enqueue\n            ASSERT_EQ(3, t->get_count());\n        });\n\n        t->enqueue(dsn::ERR_OK, nullptr);\n        t->wait();\n    }\n    ASSERT_TRUE(\n        spin_wait([&]() { return simple_rpc_response_task::allocate_count.load() == 0; }, 10));\n\n    {\n        dsn::ref_ptr<simple_task_container> c(new simple_task_container());\n        c->t = new simple_task(LPC_TEST_CLIENTLET, [c]() { ddebug(\"cycle link reference test\"); });\n\n        c->t->enqueue();\n        c->t->wait();\n    }\n    ASSERT_TRUE(spin_wait([&]() { return simple_task::allocate_count.load() == 0; }, 10));\n\n    {\n        dsn::ref_ptr<simple_task_container> c(new simple_task_container());\n        c->t = new simple_task(LPC_TEST_CLIENTLET, [c]() { ddebug(\"cycle link reference test\"); });\n\n        ASSERT_TRUE(c->t->cancel(false));\n    }\n    ASSERT_TRUE(spin_wait([&]() { return simple_task::allocate_count.load() == 0; }, 10));\n}\n"
  },
  {
    "path": "src/runtime/test/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf core* log.* nfs_test_dir* *.tmp data\n\n"
  },
  {
    "path": "src/runtime/test/client_negotiation_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"runtime/security/negotiation_utils.h\"\n#include \"runtime/security/client_negotiation.h\"\n#include \"runtime/rpc/network.sim.h\"\n\n#include <gtest/gtest.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nclass client_negotiation_test : public testing::Test\n{\npublic:\n    client_negotiation_test()\n    {\n        std::unique_ptr<tools::sim_network_provider> sim_net(\n            new tools::sim_network_provider(nullptr, nullptr));\n        _sim_session = sim_net->create_client_session(rpc_address(\"localhost\", 10086));\n        _client_negotiation = make_unique<client_negotiation>(_sim_session);\n    }\n\n    void on_recv_mechanism(const negotiation_response &resp)\n    {\n        _client_negotiation->on_recv_mechanisms(resp);\n    }\n\n    void handle_response(error_code err, const negotiation_response &resp)\n    {\n        _client_negotiation->handle_response(err, std::move(resp));\n    }\n\n    void on_mechanism_selected(const negotiation_response &resp)\n    {\n        _client_negotiation->on_mechanism_selected(resp);\n    }\n\n    void on_challenge(const negotiation_response &resp) { _client_negotiation->on_challenge(resp); }\n\n    const std::string &get_selected_mechanism() { return _client_negotiation->_selected_mechanism; }\n\n    negotiation_status::type get_negotiation_status() { return _client_negotiation->_status; }\n\n    // _sim_session is used for holding the sim_rpc_session which is created in ctor,\n    // in case it is released. Because negotiation keeps only a raw pointer.\n    rpc_session_ptr _sim_session;\n    std::unique_ptr<client_negotiation> _client_negotiation;\n};\n\nTEST_F(client_negotiation_test, on_recv_mechanisms)\n{\n    struct\n    {\n        negotiation_status::type resp_status;\n        std::string resp_msg;\n        std::string selected_mechanism;\n    } tests[] = {{negotiation_status::type::SASL_SELECT_MECHANISMS, \"GSSAPI\", \"\"},\n                 {negotiation_status::type::SASL_LIST_MECHANISMS_RESP, \"TEST1\", \"\"},\n                 {negotiation_status::type::SASL_LIST_MECHANISMS_RESP, \"TEST1, TEST2\", \"\"},\n                 {negotiation_status::type::SASL_LIST_MECHANISMS_RESP, \"TEST1, GSSAPI\", \"GSSAPI\"},\n                 {negotiation_status::type::SASL_LIST_MECHANISMS_RESP, \"GSSAPI\", \"GSSAPI\"}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            negotiation_response resp;\n            resp.status = test.resp_status;\n            resp.msg = blob::create_from_bytes(test.resp_msg.data(), test.resp_msg.length());\n            on_recv_mechanism(resp);\n\n            ASSERT_EQ(get_selected_mechanism(), test.selected_mechanism);\n        }\n    }\n}\n\nTEST_F(client_negotiation_test, handle_response)\n{\n    struct\n    {\n        error_code resp_err;\n        negotiation_status::type resp_status;\n        negotiation_status::type neg_status;\n    } tests[] = {\n        {ERR_TIMEOUT,\n         negotiation_status::type::SASL_SELECT_MECHANISMS,\n         negotiation_status::type::SASL_AUTH_FAIL},\n        {ERR_OK, negotiation_status::type::SASL_AUTH_DISABLE, negotiation_status::type::SASL_SUCC}};\n\n    for (const auto &test : tests) {\n        negotiation_response resp;\n        resp.status = test.resp_status;\n        handle_response(test.resp_err, resp);\n\n        ASSERT_EQ(get_negotiation_status(), test.neg_status);\n    }\n}\n\nTEST_F(client_negotiation_test, on_mechanism_selected)\n{\n    struct\n    {\n        std::string sasl_init_result;\n        std::string sasl_start_result;\n        negotiation_status::type resp_status;\n        negotiation_status::type neg_status;\n    } tests[] = {{\"ERR_OK\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                  negotiation_status::type::SASL_INITIATE},\n                 {\"ERR_OK\",\n                  \"ERR_SASL_INCOMPLETE\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                  negotiation_status::type::SASL_INITIATE},\n                 {\"ERR_OK\",\n                  \"ERR_TIMEOUT\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_TIMEOUT\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_OK\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS,\n                  negotiation_status::type::SASL_AUTH_FAIL}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            fail::setup();\n            fail::cfg(\"sasl_client_wrapper_init\", \"return(\" + test.sasl_init_result + \")\");\n            fail::cfg(\"sasl_client_wrapper_start\", \"return(\" + test.sasl_start_result + \")\");\n\n            negotiation_response resp;\n            resp.status = test.resp_status;\n            on_mechanism_selected(resp);\n            ASSERT_EQ(get_negotiation_status(), test.neg_status);\n\n            fail::teardown();\n        }\n    }\n}\n\nTEST_F(client_negotiation_test, on_challenge)\n{\n    struct\n    {\n        std::string sasl_step_result;\n        negotiation_status::type resp_status;\n        negotiation_status::type neg_status;\n    } tests[] = {\n        {\"ERR_OK\",\n         negotiation_status::type::SASL_CHALLENGE,\n         negotiation_status::type::SASL_CHALLENGE_RESP},\n        {\"ERR_SASL_INCOMPLETE\",\n         negotiation_status::type::SASL_CHALLENGE,\n         negotiation_status::type::SASL_CHALLENGE_RESP},\n        {\"ERR_TIMEOUT\",\n         negotiation_status::type::SASL_CHALLENGE,\n         negotiation_status::type::SASL_AUTH_FAIL},\n        {\"ERR_OK\", negotiation_status::type::SASL_SUCC, negotiation_status::type::SASL_SUCC}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            fail::setup();\n            fail::cfg(\"sasl_client_wrapper_step\", \"return(\" + test.sasl_step_result + \")\");\n\n            negotiation_response resp;\n            resp.status = test.resp_status;\n            on_challenge(resp);\n            ASSERT_EQ(get_negotiation_status(), test.neg_status);\n\n            fail::teardown();\n        }\n    }\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/command.txt",
    "content": "\nhelp\nhelp engine\nhelp unexist-cmd\nengine\ntask-code\nconfig-dump config-dump.ini\ntest-cmd this is test argument\nunexist-cmd arg1 arg2\n\n"
  },
  {
    "path": "src/runtime/test/config-test-corrupt-message.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536\nnetwork.client.RPC_CHANNEL_UDP = dsn::tools::asio_udp_provider, 65536\nnetwork.server.0.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536\nnetwork.server.0.RPC_CHANNEL_UDP = dsn::tools::asio_udp_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports = 20001\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER, THREAD_POOL_FOR_TEST_1, THREAD_POOL_FOR_TEST_2\n\n[apps.server]\ntype = test\narguments =\nports = 20101,20102\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20101.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20102.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20103.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\n\n[core]\n;tool = simulator\ntool = nativerun\n\ntoollets = fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 0\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 1000\n\n[task.RPC_TEST_HASH1]\nis_trace = true\nrpc_message_crc_required = true\nrpc_request_drop_ratio = 0\nrpc_timeout_milliseconds = 1000\nrpc_request_data_corrupted_ratio = 1\nrpc_message_data_corrupted_type = header\n\n[task.RPC_TEST_HASH2]\nis_trace = true\nrpc_message_crc_required = true\nrpc_request_drop_ratio = 0\nrpc_timeout_milliseconds = 1000\nrpc_request_data_corrupted_ratio = 1\nrpc_message_data_corrupted_type = body\n\n[task.RPC_TEST_HASH3_ACK]\nis_trace = true\nrpc_message_crc_required = true\nrpc_response_drop_ratio = 0\nrpc_timeout_milliseconds = 1000\nrpc_response_data_corrupted_ratio = 1\nrpc_message_data_corrupted_type = header\n\n[task.RPC_TEST_HASH4_ACK]\nis_trace = true\nrpc_message_crc_required = true\nrpc_response_drop_ratio = 0\nrpc_timeout_milliseconds = 1000\nrpc_response_data_corrupted_ratio = 1\nrpc_message_data_corrupted_type = body\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n[task.RPC_TEST_UDP]\nrpc_call_channel = RPC_CHANNEL_UDP\nrpc_message_crc_required = true\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n"
  },
  {
    "path": "src/runtime/test/config-test-sim.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER, THREAD_POOL_FOR_TEST_1, THREAD_POOL_FOR_TEST_2\n\n[apps.server]\ntype = test\narguments =\nports = 20101,20102\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[apps.server_group]\ntype = test\narguments =\nports = 20201\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[core]\ntool = simulator\n;tool = nativerun\n\ntoollets = tracer, profiler\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 0\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 1000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_1]\nworker_count = 2\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_2]\nworker_count = 2\npartitioned = true\n\n[core.test]\ncount = 1\nrun = true\n\n[components.simple_perf_counter]\ncounter_computation_interval_seconds = 1\n\n[components.simple_perf_counter_v2_atomic]\ncounter_computation_interval_seconds = 1\n\n[components.simple_perf_counter_v2_fast]\ncounter_computation_interval_seconds = 1\n\n[uri-resolver.http://localhost:8080]\nfactory = partition_resolver_simple\narguments = 127.0.0.1:8080\n\n"
  },
  {
    "path": "src/runtime/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536\nnetwork.client.RPC_CHANNEL_UDP = dsn::tools::asio_udp_provider, 65536\nnetwork.server.0.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider, 65536\nnetwork.server.0.RPC_CHANNEL_UDP = dsn::tools::asio_udp_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports = 20001\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER, THREAD_POOL_FOR_TEST_1, THREAD_POOL_FOR_TEST_2\n\n[apps.server]\ntype = test\narguments =\nports = 20101,20102\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\nnetwork.client.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20101.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20102.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\nnetwork.server.20103.RPC_CHANNEL_TCP = dsn::tools::asio_network_provider,65536\n\n[apps.server_group]\ntype = test\narguments =\nports = 20201\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[apps.server_not_run]\ntype = test\narguments =\nports = 20301\nrun = false\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n[core]\n;tool = simulator\ntool = nativerun\n\ntoollets = tracer, profiler\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_FATAL\n\n[tools.simulator]\nrandom_seed = 0\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 1000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n[task.RPC_TEST_UDP]\nrpc_call_channel = RPC_CHANNEL_UDP\nrpc_message_crc_required = true\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_1]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_HIGHEST\nworker_share_core = false\nworker_affinity_mask = 1\npartitioned = false\n\n[threadpool.THREAD_POOL_FOR_TEST_2]\nworker_count = 2\nworker_priority = THREAD_xPRIORITY_NORMAL\nworker_share_core = true\nworker_affinity_mask = 1\npartitioned = true\n\n[components.simple_perf_counter]\ncounter_computation_interval_seconds = 1\n\n[components.simple_perf_counter_v2_atomic]\ncounter_computation_interval_seconds = 1\n\n[components.simple_perf_counter_v2_fast]\ncounter_computation_interval_seconds = 1\n\n[core.test]\ncount = 1\nrun = true\n\n[uri-resolver.http://localhost:8080]\nfactory = partition_resolver_simple\narguments = 127.0.0.1:8080\n"
  },
  {
    "path": "src/runtime/test/corrupt_message.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#include <vector>\n#include <string>\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/async_calls.h>\n\n#include <boost/lexical_cast.hpp>\n#include <iostream>\n\n#include \"test_utils.h\"\n\n// this only works with the fault injector\nTEST(core, corrupt_message)\n{\n    int req = 0;\n    ::dsn::rpc_address server(\"localhost\", 20101);\n\n    auto result = ::dsn::rpc::call_wait<std::string>(\n        server, RPC_TEST_HASH1, req, std::chrono::milliseconds(0), 1);\n    ASSERT_EQ(result.first, ERR_TIMEOUT);\n\n    result = ::dsn::rpc::call_wait<std::string>(\n        server, RPC_TEST_HASH2, req, std::chrono::milliseconds(0), 1);\n    ASSERT_EQ(result.first, ERR_TIMEOUT);\n\n    result = ::dsn::rpc::call_wait<std::string>(\n        server, RPC_TEST_HASH3, req, std::chrono::milliseconds(0), 1);\n    ASSERT_EQ(result.first, ERR_TIMEOUT);\n\n    result = ::dsn::rpc::call_wait<std::string>(\n        server, RPC_TEST_HASH4, req, std::chrono::milliseconds(0), 1);\n    ASSERT_EQ(result.first, ERR_TIMEOUT);\n}\n"
  },
  {
    "path": "src/runtime/test/gtest.filter",
    "content": "config-test.ini -core.corrupt_message:core.aio*:core.operation_failed:tools_hpc.*\nconfig-test-sim.ini -core.corrupt_message:core.aio*:core.operation_failed:tools_hpc.*:tools_simulator.*:task_test.signal_finished_task\nconfig-test-sim.ini tools_simulator.*\n"
  },
  {
    "path": "src/runtime/test/lpc.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n#include \"test_utils.h\"\n\nDEFINE_TASK_CODE(LPC_TEST_HASH, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nvoid on_lpc_test(void *p)\n{\n    std::string &result = *(std::string *)p;\n    result = ::dsn::task::get_current_worker()->name();\n}\n\nTEST(core, lpc)\n{\n    std::string result = \"heheh\";\n    dsn::task_ptr t(new raw_task(LPC_TEST_HASH, std::bind(&on_lpc_test, (void *)&result), 1));\n    t->enqueue();\n    t->wait();\n    EXPECT_TRUE(result.substr(0, result.length() - 2) == \"client.THREAD_POOL_TEST_SERVER\");\n}\n"
  },
  {
    "path": "src/runtime/test/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <iostream>\n#include \"gtest/gtest.h\"\n#include \"test_utils.h\"\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    // register all possible services\n    dsn::service_app::register_factory<test_client>(\"test\");\n\n    // specify what services and tools will run in config file, then run\n    dsn_run(argc, argv, false);\n\n    // run in-rDSN tests\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n    if (g_test_ret != 0) {\n#ifndef ENABLE_GCOV\n        dsn_exit(g_test_ret);\n#endif\n        return g_test_ret;\n    }\n\n    if (strcmp(\"simulator\", dsn_config_get_value_string(\"core\", \"tool\", \"simulator\", \"\")) != 0) {\n        // run out-rDSN tests in other threads\n        std::cout << \"=========================================================== \" << std::endl;\n        std::cout << \"================== run in non-rDSN threads ================ \" << std::endl;\n        std::cout << \"=========================================================== \" << std::endl;\n        std::thread t([]() {\n            dsn_mimic_app(\"client\", 1);\n            exec_tests();\n        });\n        t.join();\n        if (g_test_ret != 0) {\n#ifndef ENABLE_GCOV\n            dsn_exit(g_test_ret);\n#endif\n            return g_test_ret;\n        }\n    }\n\n// exit without any destruction\n#ifndef ENABLE_GCOV\n    dsn_exit(0);\n#endif\n    return 0;\n}\n"
  },
  {
    "path": "src/runtime/test/message_reader_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n\n#include <dsn/tool-api/message_parser.h>\n\nnamespace dsn {\n\nclass message_reader_test : public testing::Test\n{\npublic:\n    void test_init()\n    {\n        message_reader reader(4096);\n        ASSERT_EQ(reader._buffer_block_size, 4096);\n        ASSERT_EQ(reader._buffer_occupied, 0);\n        ASSERT_EQ(reader._buffer.length(), 0);\n    }\n\n    void test_read_buffer()\n    {\n        message_reader reader(4096);\n\n        const char *p1 = reader.read_buffer_ptr(10);\n        ASSERT_EQ(reader._buffer_occupied, 0);\n        ASSERT_EQ(reader._buffer.length(), 4096);\n        reader.mark_read(10);\n        ASSERT_EQ(reader._buffer_occupied, 10);\n\n        const char *p2 = reader.read_buffer_ptr(10);\n        ASSERT_EQ(reader._buffer_occupied, 10);\n        ASSERT_EQ(reader._buffer.length(), 4096);\n        reader.mark_read(10);\n        ASSERT_EQ(reader._buffer_occupied, 20);\n        ASSERT_EQ(p2 - p1, 10); // p1, p2 reside on the same allocated memory buffer.\n\n        reader.read_buffer_ptr(4076);\n        ASSERT_EQ(reader._buffer_occupied, 20);\n        ASSERT_EQ(reader._buffer.length(), 4096);\n        reader.mark_read(4076);\n        ASSERT_EQ(reader._buffer_occupied, 4096);\n\n        // buffer capacity extends\n        p1 = reader.read_buffer_ptr(1);\n        ASSERT_EQ(reader._buffer_occupied, 4096);\n        ASSERT_EQ(reader._buffer.length(), 4097);\n        reader.mark_read(1);\n        ASSERT_EQ(reader._buffer_occupied, 4097);\n\n        // if buffer is not consumed in time,\n        // each read will cause one data copy\n        p2 = reader.read_buffer_ptr(3);\n        reader.mark_read(3);\n        ASSERT_EQ(reader._buffer.length(), 4100);\n        ASSERT_EQ(reader._buffer_occupied, 4100);\n        ASSERT_NE(p2 - p1, 3);\n    }\n\n    void test_read_data()\n    {\n        message_reader reader(4096);\n\n        std::string data = std::string(\"THFT\") + std::string(44, '\\0'); // 48 bytes\n        data[7] = data[9] = '\\1';\n\n        char *buf = reader.read_buffer_ptr(data.length());\n        memcpy(buf, data.data(), data.size());\n        reader.mark_read(data.length());\n        ASSERT_EQ(reader.buffer().size(), data.length());\n        ASSERT_EQ(reader.buffer().to_string(), data);\n    }\n\n    void test_consume_buffer()\n    {\n        message_reader reader(5000);\n\n        reader.read_buffer_ptr(1000);\n        reader.mark_read(1000);\n        ASSERT_EQ(reader._buffer_occupied, 1000);\n        ASSERT_EQ(reader._buffer.length(), 5000);\n        ASSERT_EQ(reader.buffer().size(), 1000);\n\n        reader.consume_buffer(500);\n        ASSERT_EQ(reader._buffer.length(), 4500);\n        ASSERT_EQ(reader._buffer_occupied, 500);\n    }\n};\n\nTEST_F(message_reader_test, init) { test_init(); }\n\nTEST_F(message_reader_test, read_buffer) { test_read_buffer(); }\n\nTEST_F(message_reader_test, read_data) { test_read_data(); }\n\nTEST_F(message_reader_test, consume_buffer) { test_consume_buffer(); }\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/message_utils_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/cpp/message_utils.h>\n#include <dsn/dist/replication/replication.codes.h>\n#include <dsn/cpp/rpc_holder.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\n\nDEFINE_TASK_CODE_RPC(RPC_CODE_FOR_TEST, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\ntypedef rpc_holder<configuration_query_by_index_request, configuration_query_by_index_response>\n    t_rpc;\n\nTEST(message_utils, msg_blob_convertion)\n{\n    std::string data = \"hello\";\n\n    blob b(data.c_str(), 0, data.size());\n    message_ptr m = from_blob_to_received_msg(RPC_CODE_FOR_TEST, std::move(b));\n\n    ASSERT_EQ(m->header->body_length, data.size());\n    ASSERT_EQ(b.to_string(), move_message_to_blob(m.get()).to_string());\n}\n\nTEST(message_utils, thrift_msg_convertion)\n{\n    configuration_query_by_index_request request;\n    request.app_name = \"haha\";\n\n    message_ptr msg =\n        from_thrift_request_to_received_message(request, RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n\n    t_rpc rpc(msg.get());\n    ASSERT_EQ(rpc.request().app_name, \"haha\");\n}\n\nTEST(message_utils, complex_convertion)\n{\n    configuration_query_by_index_request request;\n    request.app_name = \"haha\";\n\n    message_ptr msg =\n        from_thrift_request_to_received_message(request, RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n    blob b = move_message_to_blob(msg.get());\n    msg = from_blob_to_received_msg(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, std::move(b));\n\n    t_rpc rpc(msg.get());\n    ASSERT_EQ(rpc.request().app_name, \"haha\");\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/meta_access_controller_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/flags.h>\n#include <runtime/rpc/network.sim.h>\n#include <dsn/dist/replication.h>\n#include \"runtime/security/access_controller.h\"\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_acl);\n\nclass meta_access_controller_test : public testing::Test\n{\npublic:\n    meta_access_controller_test() { _meta_access_controller = create_meta_access_controller(); }\n\n    void set_super_user(const std::string &super_user)\n    {\n        _meta_access_controller->_super_users.insert(super_user);\n    }\n\n    bool pre_check(const std::string &user_name)\n    {\n        return _meta_access_controller->pre_check(user_name);\n    }\n\n    bool allowed(dsn::message_ex *msg) { return _meta_access_controller->allowed(msg); }\n\n    std::unique_ptr<access_controller> _meta_access_controller;\n};\n\nTEST_F(meta_access_controller_test, pre_check)\n{\n    const std::string SUPER_USER_NAME = \"super_user\";\n    struct\n    {\n        bool enable_acl;\n        std::string user_name;\n        bool result;\n    } tests[] = {{true, \"not_super_user\", false},\n                 {false, \"not_super_user\", true},\n                 {true, SUPER_USER_NAME, true}};\n\n    bool origin_enable_acl = FLAGS_enable_acl;\n    set_super_user(SUPER_USER_NAME);\n\n    for (const auto &test : tests) {\n        FLAGS_enable_acl = test.enable_acl;\n        ASSERT_EQ(pre_check(test.user_name), test.result);\n    }\n\n    FLAGS_enable_acl = origin_enable_acl;\n}\n\nTEST_F(meta_access_controller_test, allowed)\n{\n    struct\n    {\n        task_code rpc_code;\n        bool result;\n    } tests[] = {{RPC_CM_LIST_APPS, true},\n                 {RPC_CM_LIST_NODES, true},\n                 {RPC_CM_CLUSTER_INFO, true},\n                 {RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, true},\n                 {RPC_CM_START_RECOVERY, false}};\n\n    bool origin_enable_acl = FLAGS_enable_acl;\n    FLAGS_enable_acl = true;\n\n    std::unique_ptr<tools::sim_network_provider> sim_net(\n        new tools::sim_network_provider(nullptr, nullptr));\n    auto sim_session = sim_net->create_client_session(rpc_address(\"localhost\", 10086));\n    for (const auto &test : tests) {\n        dsn::message_ptr msg = message_ex::create_request(test.rpc_code);\n        msg->io_session = sim_session;\n\n        ASSERT_EQ(allowed(msg), test.result);\n    }\n\n    FLAGS_enable_acl = origin_enable_acl;\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/negotiation_manager_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"runtime/security/negotiation_manager.h\"\n#include \"runtime/security/negotiation_utils.h\"\n#include \"runtime/rpc/network.sim.h\"\n\n#include <gtest/gtest.h>\n#include <dsn/utility/flags.h>\n#include <dsn/dist/failure_detector/fd.code.definition.h>\n#include <http/http_server_impl.h>\n#include \"nfs/nfs_code_definition.h\"\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_auth);\nDSN_DECLARE_bool(mandatory_auth);\n\nclass negotiation_manager_test : public testing::Test\n{\npublic:\n    negotiation_rpc create_fake_rpc()\n    {\n        std::unique_ptr<tools::sim_network_provider> sim_net(\n            new tools::sim_network_provider(nullptr, nullptr));\n        auto sim_session =\n            sim_net->create_server_session(rpc_address(\"localhost\", 10086), rpc_session_ptr());\n        auto rpc = negotiation_rpc(make_unique<negotiation_request>(), RPC_NEGOTIATION);\n        rpc.dsn_request()->io_session = sim_session;\n        return rpc;\n    }\n\n    rpc_session_ptr create_fake_session(bool is_client)\n    {\n        std::unique_ptr<tools::sim_network_provider> sim_net(\n            new tools::sim_network_provider(nullptr, nullptr));\n        if (is_client) {\n            return sim_net->create_client_session(rpc_address(\"localhost\", 10086));\n        } else {\n            return sim_net->create_server_session(rpc_address(\"localhost\", 10086),\n                                                  rpc_session_ptr());\n        }\n    }\n\n    void on_negotiation_request(negotiation_rpc rpc)\n    {\n        negotiation_manager::instance().on_negotiation_request(rpc);\n    }\n\n    bool on_rpc_recv_msg(message_ex *msg)\n    {\n        return negotiation_manager::instance().on_rpc_recv_msg(msg);\n    }\n\n    bool on_rpc_send_msg(message_ex *msg)\n    {\n        return negotiation_manager::instance().on_rpc_send_msg(msg);\n    }\n};\n\nTEST_F(negotiation_manager_test, disable_auth)\n{\n    RPC_MOCKING(negotiation_rpc)\n    {\n        FLAGS_enable_auth = false;\n        auto rpc = create_fake_rpc();\n        on_negotiation_request(rpc);\n\n        ASSERT_EQ(rpc.response().status, negotiation_status::type::SASL_AUTH_DISABLE);\n    }\n}\n\nTEST_F(negotiation_manager_test, on_rpc_recv_msg)\n{\n    struct\n    {\n        task_code rpc_code;\n        bool negotiation_succeed;\n        bool mandatory_auth;\n        bool is_client;\n        bool return_value;\n    } tests[] = {{RPC_NEGOTIATION, false, true, false, true},\n                 {RPC_NEGOTIATION, false, true, true, true},\n                 {RPC_NEGOTIATION_ACK, false, true, false, true},\n                 {RPC_NEGOTIATION_ACK, false, true, true, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING, false, true, false, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING, false, true, true, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING_ACK, false, true, false, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING_ACK, false, true, true, true},\n                 {RPC_HTTP_SERVICE, false, true, false, true},\n                 {RPC_HTTP_SERVICE, false, true, true, true},\n                 {RPC_HTTP_SERVICE_ACK, false, true, false, true},\n                 {RPC_HTTP_SERVICE_ACK, false, true, true, true},\n                 {service::RPC_NFS_COPY, true, true, false, true},\n                 {service::RPC_NFS_COPY, true, true, true, true},\n                 {service::RPC_NFS_COPY, false, false, false, true},\n                 {service::RPC_NFS_COPY, false, false, true, false},\n                 {service::RPC_NFS_COPY, false, true, true, false},\n                 {service::RPC_NFS_COPY, false, true, false, false}};\n\n    for (const auto &test : tests) {\n        FLAGS_mandatory_auth = test.mandatory_auth;\n        message_ptr msg = dsn::message_ex::create_request(test.rpc_code, 0, 0);\n        auto sim_session = create_fake_session(test.is_client);\n        msg->io_session = sim_session;\n        if (test.negotiation_succeed) {\n            sim_session->set_negotiation_succeed();\n        }\n\n        ASSERT_EQ(test.return_value, on_rpc_recv_msg(msg));\n    }\n}\n\nTEST_F(negotiation_manager_test, on_rpc_send_msg)\n{\n    struct\n    {\n        task_code rpc_code;\n        bool negotiation_succeed;\n        bool mandatory_auth;\n        bool is_client;\n        bool return_value;\n    } tests[] = {{RPC_NEGOTIATION, false, true, false, true},\n                 {RPC_NEGOTIATION, false, true, true, true},\n                 {RPC_NEGOTIATION_ACK, false, true, false, true},\n                 {RPC_NEGOTIATION_ACK, false, true, true, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING, false, true, false, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING, false, true, true, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING_ACK, false, true, false, true},\n                 {fd::RPC_FD_FAILURE_DETECTOR_PING_ACK, false, true, true, true},\n                 {RPC_HTTP_SERVICE, false, true, false, true},\n                 {RPC_HTTP_SERVICE, false, true, true, true},\n                 {RPC_HTTP_SERVICE_ACK, false, true, false, true},\n                 {RPC_HTTP_SERVICE_ACK, false, true, true, true},\n                 {service::RPC_NFS_COPY, true, true, false, true},\n                 {service::RPC_NFS_COPY, true, true, true, true},\n                 {service::RPC_NFS_COPY, false, false, false, true},\n                 {service::RPC_NFS_COPY, false, false, true, false},\n                 {service::RPC_NFS_COPY, false, true, true, false},\n                 {service::RPC_NFS_COPY, false, true, false, false}};\n\n    for (const auto &test : tests) {\n        FLAGS_mandatory_auth = test.mandatory_auth;\n        message_ptr msg = dsn::message_ex::create_request(test.rpc_code, 0, 0);\n        auto sim_session = create_fake_session(test.is_client);\n        msg->io_session = sim_session;\n        if (test.negotiation_succeed) {\n            sim_session->set_negotiation_succeed();\n        }\n\n        ASSERT_EQ(test.return_value, on_rpc_send_msg(msg));\n    }\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/netprovider.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for net provider.\n *\n * Revision history:\n *     Nov., 2015, @shengofsun (Weijie Sun), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <memory>\n#include <thread>\n\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/task_spec.h>\n\n#include \"runtime/rpc/asio_net_provider.h\"\n#include \"runtime/rpc/network.sim.h\"\n#include \"runtime/rpc/rpc_engine.h\"\n#include \"runtime/service_engine.h\"\n#include \"test_utils.h\"\n\nusing namespace dsn;\nusing namespace dsn::tools;\n\nclass asio_network_provider_test : public asio_network_provider\n{\npublic:\n    asio_network_provider_test(rpc_engine *srv, network *inner_provider)\n        : asio_network_provider(srv, inner_provider)\n    {\n    }\n\npublic:\n    void change_test_cfg_conn_threshold_per_ip(uint32_t n)\n    {\n        ddebug(\n            \"change _cfg_conn_threshold_per_ip %u -> %u for test\", _cfg_conn_threshold_per_ip, n);\n        _cfg_conn_threshold_per_ip = n;\n    }\n};\n\nstatic int TEST_PORT = 20401;\nDEFINE_TASK_CODE_RPC(RPC_TEST_NETPROVIDER, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nvolatile int wait_flag = 0;\nvoid response_handler(dsn::error_code ec,\n                      dsn::message_ex *req,\n                      dsn::message_ex *resp,\n                      void *request_buf)\n{\n    if (ERR_OK == ec) {\n        std::string response_string;\n        char *request_str = (char *)(request_buf);\n        ::dsn::unmarshall(resp, response_string);\n        ASSERT_TRUE(strcmp(response_string.c_str(), request_str) == 0);\n    } else {\n        ddebug(\"error msg: %s\", ec.to_string());\n    }\n    wait_flag = 1;\n}\n\nvoid reject_response_handler(dsn::error_code ec)\n{\n    wait_flag = 1;\n    ASSERT_TRUE(ERR_TIMEOUT == ec);\n}\n\nvoid rpc_server_response(dsn::message_ex *request)\n{\n    std::string str_command;\n    ::dsn::unmarshall(request, str_command);\n    dsn::message_ex *response = request->create_response();\n    ::dsn::marshall(response, str_command);\n    dsn_rpc_reply(response);\n}\n\nvoid wait_response()\n{\n    while (wait_flag == 0)\n        std::this_thread::sleep_for(std::chrono::milliseconds(5));\n}\n\nvoid rpc_client_session_send(rpc_session_ptr client_session, bool reject = false)\n{\n    message_ex *msg = message_ex::create_request(RPC_TEST_NETPROVIDER, 0, 0);\n    std::unique_ptr<char[]> buf(new char[128]);\n    memset(buf.get(), 0, 128);\n    strcpy(buf.get(), \"hello world\");\n    ::dsn::marshall(msg, std::string(buf.get()));\n\n    wait_flag = 0;\n    if (!reject) {\n        rpc_response_task *t = new rpc_response_task(msg,\n                                                     std::bind(&response_handler,\n                                                               std::placeholders::_1,\n                                                               std::placeholders::_2,\n                                                               std::placeholders::_3,\n                                                               buf.get()),\n                                                     0);\n        client_session->net().engine()->matcher()->on_call(msg, t);\n    } else {\n        rpc_response_task *t = new rpc_response_task(\n            msg, std::bind(&reject_response_handler, std::placeholders::_1), 0);\n        client_session->net().engine()->matcher()->on_call(msg, t);\n    }\n    client_session->send_message(msg);\n    wait_response();\n}\n\nTEST(tools_common, asio_net_provider)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n\n    ASSERT_TRUE(dsn_rpc_register_handler(\n        RPC_TEST_NETPROVIDER, \"rpc.test.netprovider\", rpc_server_response));\n\n    std::unique_ptr<asio_network_provider> asio_network(\n        new asio_network_provider(task::get_current_rpc(), nullptr));\n\n    error_code start_result;\n    start_result = asio_network->start(RPC_CHANNEL_TCP, TEST_PORT, true);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    // the same asio network handle, start only client is ok\n    start_result = asio_network->start(RPC_CHANNEL_TCP, TEST_PORT, true);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    rpc_address network_addr = asio_network->address();\n    ASSERT_TRUE(network_addr.port() == TEST_PORT);\n\n    std::unique_ptr<asio_network_provider> asio_network2(\n        new asio_network_provider(task::get_current_rpc(), nullptr));\n    start_result = asio_network2->start(RPC_CHANNEL_TCP, TEST_PORT, true);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    start_result = asio_network2->start(RPC_CHANNEL_TCP, TEST_PORT, false);\n    ASSERT_TRUE(start_result == ERR_OK);\n    ddebug(\"result: %s\", start_result.to_string());\n\n    start_result = asio_network2->start(RPC_CHANNEL_TCP, TEST_PORT, false);\n    ASSERT_TRUE(start_result == ERR_SERVICE_ALREADY_RUNNING);\n    ddebug(\"result: %s\", start_result.to_string());\n\n    rpc_session_ptr client_session =\n        asio_network->create_client_session(rpc_address(\"localhost\", TEST_PORT));\n    client_session->connect();\n\n    rpc_client_session_send(client_session);\n\n    ASSERT_TRUE(dsn_rpc_unregiser_handler(RPC_TEST_NETPROVIDER));\n\n    TEST_PORT++;\n}\n\nTEST(tools_common, asio_udp_provider)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n\n    ASSERT_TRUE(dsn_rpc_register_handler(\n        RPC_TEST_NETPROVIDER, \"rpc.test.netprovider\", rpc_server_response));\n\n    std::unique_ptr<asio_udp_provider> client(\n        new asio_udp_provider(task::get_current_rpc(), nullptr));\n\n    error_code start_result;\n    start_result = client->start(RPC_CHANNEL_UDP, 0, true);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    start_result = client->start(RPC_CHANNEL_UDP, TEST_PORT, false);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    message_ex *msg = message_ex::create_request(RPC_TEST_NETPROVIDER, 0, 0);\n    std::unique_ptr<char[]> buf(new char[128]);\n    memset(buf.get(), 0, 128);\n    strcpy(buf.get(), \"hello world\");\n    ::dsn::marshall(msg, std::string(buf.get()));\n\n    wait_flag = 0;\n    rpc_response_task *t = new rpc_response_task(msg,\n                                                 std::bind(&response_handler,\n                                                           std::placeholders::_1,\n                                                           std::placeholders::_2,\n                                                           std::placeholders::_3,\n                                                           buf.get()),\n                                                 0);\n\n    client->engine()->matcher()->on_call(msg, t);\n    client->send_message(msg);\n\n    wait_response();\n\n    ASSERT_TRUE(dsn_rpc_unregiser_handler(RPC_TEST_NETPROVIDER));\n    TEST_PORT++;\n}\n\nTEST(tools_common, sim_net_provider)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n\n    ASSERT_TRUE(dsn_rpc_register_handler(\n        RPC_TEST_NETPROVIDER, \"rpc.test.netprovider\", rpc_server_response));\n\n    std::unique_ptr<sim_network_provider> sim_net(\n        new sim_network_provider(task::get_current_rpc(), nullptr));\n\n    error_code ans;\n    ans = sim_net->start(RPC_CHANNEL_TCP, TEST_PORT, false);\n    ASSERT_TRUE(ans == ERR_OK);\n\n    ans = sim_net->start(RPC_CHANNEL_TCP, TEST_PORT, false);\n    ASSERT_TRUE(ans == ERR_ADDRESS_ALREADY_USED);\n\n    rpc_session_ptr client_session =\n        sim_net->create_client_session(rpc_address(\"localhost\", TEST_PORT));\n    client_session->connect();\n\n    rpc_client_session_send(client_session);\n\n    ASSERT_TRUE(dsn_rpc_unregiser_handler(RPC_TEST_NETPROVIDER));\n\n    TEST_PORT++;\n}\n\nTEST(tools_common, asio_network_provider_connection_threshold)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n\n    ASSERT_TRUE(dsn_rpc_register_handler(\n        RPC_TEST_NETPROVIDER, \"rpc.test.netprovider\", rpc_server_response));\n\n    std::unique_ptr<asio_network_provider_test> asio_network(\n        new asio_network_provider_test(task::get_current_rpc(), nullptr));\n\n    error_code start_result;\n    start_result = asio_network->start(RPC_CHANNEL_TCP, TEST_PORT, false);\n    ASSERT_TRUE(start_result == ERR_OK);\n\n    auto CONN_THRESHOLD = 3;\n    asio_network->change_test_cfg_conn_threshold_per_ip(CONN_THRESHOLD);\n\n    // not exceed threshold\n    for (int count = 0; count < CONN_THRESHOLD + 2; count++) {\n        ddebug(\"client # %d\", count);\n        rpc_session_ptr client_session =\n            asio_network->create_client_session(rpc_address(\"localhost\", TEST_PORT));\n        client_session->connect();\n\n        rpc_client_session_send(client_session);\n\n        client_session->close();\n        std::this_thread::sleep_for(std::chrono::milliseconds(5));\n    }\n\n    // exceed threshold\n    bool reject = false;\n    for (int count = 0; count < CONN_THRESHOLD + 2; count++) {\n        ddebug(\"client # %d\", count);\n        rpc_session_ptr client_session =\n            asio_network->create_client_session(rpc_address(\"localhost\", TEST_PORT));\n        client_session->connect();\n\n        if (count >= CONN_THRESHOLD)\n            reject = true;\n        rpc_client_session_send(client_session, reject);\n    }\n\n    ASSERT_TRUE(dsn_rpc_unregiser_handler(RPC_TEST_NETPROVIDER));\n\n    TEST_PORT++;\n}\n"
  },
  {
    "path": "src/runtime/test/pipeline_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/cpp/pipeline.h>\n#include <dsn/dist/replication.h>\n\nnamespace dsn {\n\nTEST(pipeline_test, pause)\n{\n    struct mock_when : pipeline::when<>\n    {\n        void run() override { repeat(1_s); }\n    };\n\n    task_tracker tracker;\n\n    {\n        pipeline::base base;\n        ASSERT_TRUE(base.paused());\n\n        base.pause();\n        ASSERT_TRUE(base.paused());\n\n        mock_when s1;\n        base.thread_pool(LPC_MUTATION_LOG_PENDING_TIMER).task_tracker(&tracker);\n        base.from(s1);\n\n        {\n            base.run_pipeline();\n            ASSERT_FALSE(base.paused());\n\n            base.pause();\n            ASSERT_TRUE(base.paused());\n\n            base.wait_all();\n        }\n    }\n}\n\nTEST(pipeline_test, link_pipe)\n{\n    task_tracker tracker;\n\n    struct mock_when : pipeline::when<>\n    {\n        void run() override { repeat(1_s); }\n    };\n\n    struct stage2 : pipeline::when<>, pipeline::result<>\n    {\n        void run() override { step_down_next_stage(); }\n    };\n\n    {\n        pipeline::base base1;\n        mock_when s1;\n        base1.thread_pool(LPC_MUTATION_LOG_PENDING_TIMER).task_tracker(&tracker);\n        base1.from(s1);\n\n        // base2 executes s2, then executes s1 in another pipeline.\n        pipeline::base base2;\n        stage2 s2;\n        base2.thread_pool(LPC_REPLICA_SERVER_DELAY_START).task_tracker(&tracker);\n        base2.from(s2).link(s1);\n\n        base2.run_pipeline();\n\n        base1.pause();\n        base2.pause();\n\n        base2.wait_all();\n    }\n}\n\nTEST(pipeline_test, verify_link_and_fork)\n{\n    struct mock_stage : pipeline::when<>, pipeline::result<>\n    {\n        void run() override { step_down_next_stage(); }\n    };\n    task_tracker tracker;\n    {\n        pipeline::base base;\n        base.thread_pool(LPC_MUTATION_LOG_PENDING_TIMER).task_tracker(&tracker).thread_hash(1);\n        mock_stage s1;\n        mock_stage s2;\n        base.from(s1).link(s2);\n        ASSERT_EQ(s1.__conf.tracker, &tracker);\n        ASSERT_EQ(s1.__conf.thread_hash, 1);\n        ASSERT_EQ(s1.__conf.thread_pool_code, LPC_MUTATION_LOG_PENDING_TIMER);\n        ASSERT_EQ(s1.__pipeline, &base);\n        ASSERT_EQ(s2.__conf.tracker, &tracker);\n        ASSERT_EQ(s2.__conf.thread_hash, 1);\n        ASSERT_EQ(s2.__conf.thread_pool_code, LPC_MUTATION_LOG_PENDING_TIMER);\n        ASSERT_EQ(s2.__pipeline, &base);\n        mock_stage s3;\n        base.fork(s3, LPC_REPLICA_SERVER_DELAY_START, 2).link(s2);\n        ASSERT_EQ(s3.__conf.thread_pool_code, LPC_REPLICA_SERVER_DELAY_START);\n        ASSERT_EQ(s3.__conf.tracker, &tracker);\n        ASSERT_EQ(s3.__conf.thread_hash, 2);\n        ASSERT_EQ(s3.__pipeline, &base);\n        ASSERT_EQ(s2.__conf.tracker, &tracker);\n        ASSERT_EQ(s2.__conf.thread_hash, 1);\n        ASSERT_EQ(s2.__conf.thread_pool_code, LPC_MUTATION_LOG_PENDING_TIMER);\n        ASSERT_EQ(s2.__pipeline, &base);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/replica_access_controller_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/flags.h>\n#include <dsn/dist/replication.h>\n#include \"runtime/security/replica_access_controller.h\"\n#include \"runtime/rpc/network.sim.h\"\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_acl);\n\nclass replica_access_controller_test : public testing::Test\n{\npublic:\n    replica_access_controller_test()\n    {\n        _replica_access_controller = make_unique<replica_access_controller>(\"test\");\n    }\n\n    bool allowed(dsn::message_ex *msg) { return _replica_access_controller->allowed(msg); }\n\n    void set_replica_users(std::unordered_set<std::string> &&replica_users)\n    {\n        _replica_access_controller->_users.swap(replica_users);\n    }\n\n    std::unique_ptr<replica_access_controller> _replica_access_controller;\n};\n\nTEST_F(replica_access_controller_test, allowed)\n{\n    struct\n    {\n        std::unordered_set<std::string> replica_users;\n        std::string client_user;\n        bool result;\n    } tests[] = {{{\"replica_user1\", \"replica_user2\"}, \"replica_user1\", true},\n                 {{\"replica_user1\", \"replica_user2\"}, \"not_replica_user\", false},\n                 {{}, \"user_name\", true}};\n\n    bool origin_enable_acl = FLAGS_enable_acl;\n    FLAGS_enable_acl = true;\n\n    std::unique_ptr<tools::sim_network_provider> sim_net(\n        new tools::sim_network_provider(nullptr, nullptr));\n    auto sim_session = sim_net->create_client_session(rpc_address(\"localhost\", 10086));\n    dsn::message_ptr msg = message_ex::create_request(RPC_CM_LIST_APPS);\n    msg->io_session = sim_session;\n\n    for (auto &test : tests) {\n        set_replica_users(std::move(test.replica_users));\n        sim_session->set_client_username(test.client_user);\n\n        ASSERT_EQ(allowed(msg), test.result);\n    }\n\n    FLAGS_enable_acl = origin_enable_acl;\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/rpc.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <vector>\n#include <string>\n#include <queue>\n\n#include <boost/lexical_cast.hpp>\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\n#include <dsn/utility/priority_queue.h>\n#include <dsn/tool-api/group_address.h>\n#include <dsn/tool-api/async_calls.h>\n\n#include \"test_utils.h\"\n\ntypedef std::function<void(error_code, dsn::message_ex *, dsn::message_ex *)> rpc_reply_handler;\n\nstatic dsn::rpc_address build_group()\n{\n    ::dsn::rpc_address server_group;\n    server_group.assign_group(\"server_group.test\");\n    dsn::rpc_group_address *g = server_group.group_address();\n    for (uint16_t p = TEST_PORT_BEGIN; p <= TEST_PORT_END; ++p) {\n        g->add(dsn::rpc_address(\"localhost\", p));\n    }\n\n    g->set_leader(dsn::rpc_address(\"localhost\", TEST_PORT_BEGIN));\n    return server_group;\n}\n\nstatic ::dsn::rpc_address dsn_address_from_string(const std::string &str)\n{\n    size_t pos = str.find(\":\");\n    if (pos != std::string::npos) {\n        std::string host = str.substr(0, pos);\n        uint16_t port = boost::lexical_cast<uint16_t>(str.substr(pos + 1));\n        return ::dsn::rpc_address(host.c_str(), port);\n    } else {\n        // invalid address\n        return ::dsn::rpc_address();\n    }\n}\n\nTEST(core, rpc)\n{\n    int req = 0;\n    ::dsn::rpc_address server(\"localhost\", 20101);\n\n    auto result = ::dsn::rpc::call_wait<std::string>(\n        server, RPC_TEST_HASH, req, std::chrono::milliseconds(0), 1);\n    EXPECT_TRUE(result.first == ERR_OK);\n\n    EXPECT_TRUE(result.second.substr(0, result.second.length() - 2) ==\n                \"server.THREAD_POOL_TEST_SERVER\");\n}\n\nTEST(core, group_address_talk_to_others)\n{\n    ::dsn::rpc_address addr = build_group();\n\n    auto typed_callback = [addr](error_code err_code, const std::string &result) {\n        EXPECT_EQ(ERR_OK, err_code);\n        dsn::rpc_address addr_got;\n        ddebug(\"talk to others callback, result: %s\", result.c_str());\n        EXPECT_TRUE(addr_got.from_string_ipv4(result.c_str()));\n        EXPECT_EQ(TEST_PORT_END, addr_got.port());\n    };\n    ::dsn::task_ptr resp = ::dsn::rpc::call(addr,\n                                            RPC_TEST_STRING_COMMAND,\n                                            std::string(\"expect_talk_to_others\"),\n                                            nullptr,\n                                            typed_callback);\n    resp->wait();\n}\n\nTEST(core, group_address_change_leader)\n{\n    ::dsn::rpc_address addr = build_group();\n\n    error_code rpc_err;\n    auto typed_callback = [addr, &rpc_err](error_code err_code, const std::string &result) -> void {\n        rpc_err = err_code;\n        if (ERR_OK == err_code) {\n            ::dsn::rpc_address addr_got;\n            ddebug(\"talk to others callback, result: %s\", result.c_str());\n            EXPECT_TRUE(addr_got.from_string_ipv4(result.c_str()));\n            EXPECT_EQ(TEST_PORT_END, addr_got.port());\n        }\n    };\n\n    ::dsn::task_ptr resp_task;\n\n    // not update leader on forwarding\n    addr.group_address()->set_update_leader_automatically(false);\n    addr.group_address()->set_leader(dsn::rpc_address(\"localhost\", TEST_PORT_BEGIN));\n    resp_task = ::dsn::rpc::call(addr,\n                                 RPC_TEST_STRING_COMMAND,\n                                 std::string(\"expect_talk_to_others\"),\n                                 nullptr,\n                                 typed_callback);\n    resp_task->wait();\n    if (rpc_err == ERR_OK) {\n        EXPECT_EQ(dsn::rpc_address(\"localhost\", TEST_PORT_BEGIN),\n                  dsn::rpc_address(addr.group_address()->leader()));\n    }\n\n    // update leader on forwarding\n    addr.group_address()->set_update_leader_automatically(true);\n    addr.group_address()->set_leader(dsn::rpc_address(\"localhost\", TEST_PORT_BEGIN));\n    resp_task = dsn::rpc::call(addr,\n                               RPC_TEST_STRING_COMMAND,\n                               std::string(\"expect_talk_to_others\"),\n                               nullptr,\n                               typed_callback);\n    resp_task->wait();\n    ddebug(\"addr.leader=%s\", addr.group_address()->leader().to_string());\n    if (rpc_err == ERR_OK) {\n        EXPECT_EQ(TEST_PORT_END, addr.group_address()->leader().port());\n    }\n}\n\ntypedef ::dsn::utils::priority_queue<::dsn::task_ptr, 1> task_resp_queue;\nstatic void rpc_group_callback(error_code err,\n                               dsn::message_ex *req,\n                               dsn::message_ex *resp,\n                               task_resp_queue *q,\n                               rpc_reply_handler action_on_succeed,\n                               rpc_reply_handler action_on_failure)\n{\n    if (ERR_OK == err) {\n        action_on_succeed(err, req, resp);\n    } else {\n        action_on_failure(err, req, resp);\n\n        dsn::rpc_address group_addr = ((dsn::message_ex *)req)->server_address;\n        group_addr.group_address()->leader_forward();\n\n        auto req_again = req->copy(false, false);\n        auto call_again = ::dsn::rpc::call(\n            group_addr,\n            req_again,\n            nullptr,\n            [=](error_code err, dsn::message_ex *request, dsn::message_ex *response) {\n                rpc_group_callback(err,\n                                   request,\n                                   response,\n                                   q,\n                                   std::move(action_on_succeed),\n                                   std::move(action_on_failure));\n            });\n        q->enqueue(call_again, 0);\n    }\n}\n\nstatic void send_message(::dsn::rpc_address addr,\n                         const std::string &command,\n                         int repeat_times,\n                         rpc_reply_handler action_on_succeed,\n                         rpc_reply_handler action_on_failure)\n{\n    task_resp_queue q(\"response.queue\");\n    for (int i = 0; i != repeat_times; ++i) {\n        dsn::message_ptr request = dsn::message_ex::create_request(RPC_TEST_STRING_COMMAND);\n        ::dsn::marshall(request.get(), command);\n        dsn::task_ptr resp_task = ::dsn::rpc::call(\n            addr,\n            request.get(),\n            nullptr,\n            [&](error_code err, dsn::message_ex *request, dsn::message_ex *response) {\n                rpc_group_callback(\n                    err, request, response, &q, action_on_succeed, action_on_failure);\n            });\n        q.enqueue(resp_task, 0);\n    }\n    while (q.count() != 0) {\n        task_ptr p = q.dequeue();\n        p->wait();\n    }\n}\n\nTEST(core, group_address_no_response_2)\n{\n    ::dsn::rpc_address addr = build_group();\n    rpc_reply_handler action_on_succeed =\n        [](error_code err, dsn::message_ex *, dsn::message_ex *resp) {\n            EXPECT_TRUE(err == ERR_OK);\n            std::string result;\n            ::dsn::unmarshall(resp, result);\n            ::dsn::rpc_address a = dsn_address_from_string(result);\n            EXPECT_TRUE(a.port() == TEST_PORT_END);\n        };\n\n    rpc_reply_handler action_on_failure =\n        [](error_code err, dsn::message_ex *req, dsn::message_ex *) {\n            if (err == ERR_TIMEOUT) {\n                EXPECT_TRUE(((dsn::message_ex *)req)->to_address.port() != TEST_PORT_END);\n            }\n        };\n\n    send_message(addr, std::string(\"expect_no_reply\"), 1, action_on_succeed, action_on_failure);\n}\n\nTEST(core, send_to_invalid_address)\n{\n    ::dsn::rpc_address group = build_group();\n    /* here we assume 10.255.254.253:32766 is not assigned */\n    group.group_address()->set_leader(dsn::rpc_address(\"10.255.254.253\", 32766));\n\n    rpc_reply_handler action_on_succeed =\n        [](error_code err, dsn::message_ex *, dsn::message_ex *resp) {\n            EXPECT_TRUE(err == ERR_OK);\n            std::string hehe_str;\n            ::dsn::unmarshall(resp, hehe_str);\n            EXPECT_TRUE(hehe_str == \"hehehe\");\n        };\n    rpc_reply_handler action_on_failure = [](error_code err, dsn::message_ex *, dsn::message_ex *) {\n        EXPECT_TRUE(err != ERR_OK);\n    };\n\n    send_message(group, std::string(\"echo hehehe\"), 1, action_on_succeed, action_on_failure);\n}\n"
  },
  {
    "path": "src/runtime/test/rpc_holder_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/cpp/rpc_holder.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n#include <dsn/cpp/message_utils.h>\n\n#include <gtest/gtest.h>\n\nusing namespace dsn;\n\ntypedef rpc_holder<configuration_query_by_index_request, configuration_query_by_index_response>\n    t_rpc;\n\nDEFINE_TASK_CODE_RPC(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX,\n                     TASK_PRIORITY_COMMON,\n                     THREAD_POOL_DEFAULT)\n\nTEST(rpc_holder, type_traits)\n{\n    ASSERT_FALSE(is_rpc_holder<bool>::value);\n    ASSERT_TRUE(is_rpc_holder<t_rpc>::value);\n}\n\nTEST(rpc_holder, construct)\n{\n    {\n        t_rpc rpc;\n        ASSERT_FALSE(rpc.is_initialized());\n    }\n\n    {\n        auto request = make_unique<configuration_query_by_index_request>();\n        t_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n        ASSERT_TRUE(rpc.is_initialized());\n    }\n\n    {\n        configuration_query_by_index_request request;\n        request.app_name = \"test\";\n        dsn::message_ex *msg =\n            dsn::message_ex::create_request(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n        dsn::marshall(msg, request);\n        dsn::message_ex *msg2 = msg->copy(true, true);\n\n        t_rpc rpc(msg2);\n        ASSERT_TRUE(rpc.is_initialized());\n        ASSERT_EQ(rpc.request().app_name, \"test\");\n    }\n\n    {\n        auto request = make_unique<configuration_query_by_index_request>();\n        t_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n        ASSERT_EQ(rpc.error(), ERR_OK);\n        ASSERT_TRUE(rpc.is_initialized());\n\n        rpc.error() = ERR_BUSY;\n        ASSERT_EQ(rpc.error(), ERR_BUSY);\n\n        rpc.error() = ERR_ADDRESS_ALREADY_USED;\n        ASSERT_EQ(rpc.error(), ERR_ADDRESS_ALREADY_USED);\n    }\n}\n\nTEST(rpc_holder, mock_rpc_call)\n{\n    RPC_MOCKING(t_rpc)\n    {\n        auto &mail_box = t_rpc::mail_box();\n\n        for (int i = 0; i < 10; i++) {\n            auto request = make_unique<configuration_query_by_index_request>();\n            t_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n            rpc.call(rpc_address(\"127.0.0.1\", 12321), nullptr, [](error_code) {});\n        }\n\n        ASSERT_EQ(mail_box.size(), 10);\n    }\n\n    // test in error cases\n    RPC_MOCKING(t_rpc)\n    {\n        auto &mail_box = t_rpc::mail_box();\n\n        for (int i = 0; i < 10; i++) {\n            auto request = make_unique<configuration_query_by_index_request>();\n            t_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n            rpc.error() = ERR_BUSY;\n            rpc.call(rpc_address(\"127.0.0.1\", 12321), nullptr, [](error_code) {});\n        }\n\n        ASSERT_EQ(mail_box.size(), 10);\n\n        for (const auto &iter : mail_box) {\n            ASSERT_EQ(iter.error(), ERR_BUSY);\n        }\n    }\n\n    // instances of rpc mocking are independent\n    RPC_MOCKING(t_rpc)\n    {\n        auto &mail_box = t_rpc::mail_box();\n        ASSERT_EQ(mail_box.size(), 0);\n\n        for (int i = 0; i < 10; i++) {\n            auto request = make_unique<configuration_query_by_index_request>();\n            t_rpc rpc(std::move(request), RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n            rpc.call(rpc_address(\"127.0.0.1\", 12321), nullptr, [](error_code) {});\n        }\n\n        ASSERT_EQ(mail_box.size(), 10);\n    }\n}\n\nTEST(rpc_holder, mock_rpc_reply)\n{\n    RPC_MOCKING(t_rpc)\n    {\n        auto &mail_box = t_rpc::mail_box();\n\n        for (int i = 0; i < 10; i++) {\n            configuration_query_by_index_request request;\n            request.app_name = \"haha\";\n            auto msg = from_thrift_request_to_received_message(\n                request, RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n            auto rpc = t_rpc::auto_reply(msg);\n\n            // destruct rpc and automatically reply via mail_box\n        }\n\n        ASSERT_EQ(mail_box.size(), 10);\n    }\n}\n\nTEST(rpc_holder, mock_rpc_forward)\n{\n    RPC_MOCKING(t_rpc)\n    {\n        auto &mail_box = t_rpc::mail_box();\n        auto &forward_mail_box = t_rpc::forward_mail_box();\n        rpc_address forward_addr(\"127.0.0.1\", 10086);\n\n        for (int i = 0; i < 10; i++) {\n            configuration_query_by_index_request request;\n            auto msg = from_thrift_request_to_received_message(\n                request, RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX);\n            auto rpc = t_rpc::auto_reply(msg);\n            rpc.forward(forward_addr);\n\n            // destruct rpc and automatically reply via mail_box\n        }\n\n        ASSERT_EQ(mail_box.size(), 0);\n        ASSERT_EQ(forward_mail_box.size(), 10);\n        for (auto rpc : forward_mail_box) {\n            ASSERT_EQ(rpc.remote_address(), forward_addr);\n        }\n    }\n}\n"
  },
  {
    "path": "src/runtime/test/rpc_message.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for rpc_message.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"runtime/message_utils.cpp\"\n#include <dsn/utility/crc.h>\n#include <dsn/tool-api/rpc_message.h>\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\n\nDEFINE_TASK_CODE_RPC(RPC_CODE_FOR_TEST, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)\n\nTEST(core, message_ex)\n{\n    msg_context_t ctx0, ctx1;\n    ctx0.context = 0;\n    ctx0.u.is_request = true;\n    ctx0.u.serialize_format = DSF_THRIFT_BINARY;\n    ctx0.u.is_forward_supported = true;\n\n    ctx1.context = 0;\n    ctx1.u.is_request = false;\n    ctx1.u.serialize_format = DSF_THRIFT_BINARY;\n    ctx1.u.is_forward_supported = true;\n\n    { // create_request\n        uint64_t next_id = message_ex::new_id() + 1;\n        message_ex *m = message_ex::create_request(RPC_CODE_FOR_TEST, 100, 1, 2);\n        ASSERT_EQ(0, m->get_count());\n\n        message_header &h = *m->header;\n        ASSERT_EQ(0, h.hdr_version);\n        ASSERT_EQ(sizeof(message_header), h.hdr_length);\n        ASSERT_EQ(CRC_INVALID, h.hdr_crc32);\n        ASSERT_EQ(0, h.body_length);\n        ASSERT_EQ(CRC_INVALID, h.body_crc32);\n        ASSERT_EQ(next_id, h.id);\n        ASSERT_EQ(0, h.trace_id); ///////////////////\n        ASSERT_STREQ(dsn::task_code(RPC_CODE_FOR_TEST).to_string(), h.rpc_name);\n        ASSERT_EQ(0, h.gpid.value());\n        ASSERT_EQ(ctx0.context, h.context.context);\n        ASSERT_EQ(100, h.client.timeout_ms);\n        ASSERT_EQ(1, h.client.thread_hash);\n        ASSERT_EQ(2, h.client.partition_hash);\n        ASSERT_EQ(0, h.from_address.port());\n\n        ASSERT_EQ(1u, m->buffers.size());\n        ASSERT_EQ((int)RPC_CODE_FOR_TEST, m->local_rpc_code);\n\n        m->add_ref();\n        ASSERT_EQ(1, m->get_count());\n        m->release_ref();\n    }\n\n    { // create_response\n        message_ex *request = message_ex::create_request(RPC_CODE_FOR_TEST, 0, 0);\n        request->header->from_address = rpc_address(\"127.0.0.1\", 8080);\n        request->to_address = rpc_address(\"127.0.0.1\", 9090);\n        request->header->trace_id = 123456;\n\n        message_ex *response = request->create_response();\n\n        message_header &h = *response->header;\n        ASSERT_EQ(0, h.hdr_version);\n        ASSERT_EQ(sizeof(message_header), h.hdr_length);\n        ASSERT_EQ(CRC_INVALID, h.hdr_crc32);\n        ASSERT_EQ(0, h.body_length);\n        ASSERT_EQ(CRC_INVALID, h.body_crc32);\n        ASSERT_EQ(request->header->id, h.id);\n        ASSERT_EQ(request->header->trace_id, h.trace_id); ///////////////////\n        ASSERT_STREQ(dsn::task_code(RPC_CODE_FOR_TEST_ACK).to_string(), h.rpc_name);\n        ASSERT_EQ(0, h.gpid.value());\n        ASSERT_EQ(ctx1.context, h.context.context);\n        ASSERT_EQ(0, h.server.error_code.local_code);\n\n        ASSERT_EQ(1u, response->buffers.size());\n        ASSERT_EQ((int)RPC_CODE_FOR_TEST_ACK, response->local_rpc_code);\n        ASSERT_EQ(request->header->from_address, response->to_address);\n        ASSERT_EQ(request->to_address, response->header->from_address);\n\n        response->add_ref();\n        response->release_ref();\n\n        request->add_ref();\n        request->release_ref();\n    }\n\n    { // write\n        message_ex *request = message_ex::create_request(RPC_CODE_FOR_TEST, 100, 1);\n        const char *data = \"adaoihfeuifgggggisdosghkbvjhzxvdafdiofgeof\";\n        size_t data_size = strlen(data);\n\n        void *ptr;\n        size_t sz;\n\n        request->write_next(&ptr, &sz, data_size);\n        memcpy(ptr, data, data_size);\n        request->write_commit(data_size);\n        ASSERT_EQ(2u, request->buffers.size());\n        ASSERT_EQ(ptr, request->rw_ptr(0));\n        ASSERT_EQ((void *)((char *)ptr + 10), request->rw_ptr(10));\n        ASSERT_EQ(nullptr, request->rw_ptr(data_size));\n\n        request->write_next(&ptr, &sz, data_size);\n        memcpy(ptr, data, data_size);\n        request->write_commit(data_size);\n        ASSERT_EQ(3u, request->buffers.size());\n        ASSERT_EQ(ptr, request->rw_ptr(data_size));\n        ASSERT_EQ((void *)((char *)ptr + 10), request->rw_ptr(data_size + 10));\n        ASSERT_EQ(nullptr, request->rw_ptr(data_size + data_size));\n\n        request->add_ref();\n        request->release_ref();\n    }\n\n    { // read\n        message_ex *request = message_ex::create_request(RPC_CODE_FOR_TEST, 100, 1);\n        const char *data = \"adaoihfeuifgggggisdosghkbvjhzxvdafdiofgeof\";\n        size_t data_size = strlen(data);\n\n        void *ptr;\n        size_t sz;\n\n        request->write_next(&ptr, &sz, data_size);\n        memcpy(ptr, data, data_size);\n        request->write_commit(data_size);\n\n        ASSERT_EQ(2u, request->buffers.size());\n\n        message_ex *receive = message_ex::create_received_request(\n            request->local_rpc_code,\n            (dsn_msg_serialize_format)request->header->context.u.serialize_format,\n            (void *)request->buffers[1].data(),\n            request->buffers[1].size(),\n            request->header->client.thread_hash,\n            request->header->client.partition_hash);\n        ASSERT_EQ(2u, receive->buffers.size());\n\n        ASSERT_STREQ(dsn::task_code(RPC_CODE_FOR_TEST).to_string(), receive->header->rpc_name);\n\n        ASSERT_TRUE(receive->read_next(&ptr, &sz));\n        ASSERT_EQ(data_size, sz);\n        ASSERT_EQ(std::string(data), std::string((const char *)ptr, sz));\n        receive->read_commit(sz);\n\n        ASSERT_FALSE(receive->read_next(&ptr, &sz));\n\n        receive->add_ref();\n        receive->release_ref();\n\n        request->add_ref();\n        request->release_ref();\n    }\n}\n\nTEST(rpc_message, restore_read)\n{\n    using namespace dsn;\n    configuration_query_by_index_request request, result;\n    message_ptr msg = from_thrift_request_to_received_message(request, RPC_CODE_FOR_TEST);\n    for (int i = 0; i < 10; i++) {\n        unmarshall(msg, result);\n        msg->restore_read();\n    }\n}\n\nTEST(rpc_message, create_receive_message_with_standalone_header)\n{\n    auto data = blob::create_from_bytes(\"10086\");\n\n    message_ptr msg = message_ex::create_receive_message_with_standalone_header(data);\n    ASSERT_EQ(msg->buffers.size(), 2);\n    ASSERT_EQ(0, strcmp(msg->buffers[1].data(), data.data()));\n    ASSERT_EQ(msg->header->body_length, data.length());\n}\n\nTEST(rpc_message, copy_message_no_reply)\n{\n    auto data = blob::create_from_bytes(\"10086\");\n    message_ptr old_msg = message_ex::create_receive_message_with_standalone_header(data);\n    old_msg->local_rpc_code = RPC_CODE_FOR_TEST;\n\n    auto msg = message_ex::copy_message_no_reply(*old_msg);\n    ASSERT_EQ(msg->buffers.size(), old_msg->buffers.size());\n    ASSERT_EQ(0, strcmp(msg->buffers[1].data(), old_msg->buffers[1].data()));\n    ASSERT_EQ(msg->header->body_length, old_msg->header->body_length);\n    ASSERT_EQ(msg->local_rpc_code, old_msg->local_rpc_code);\n\n    // add_ref was called in message_ex::copy_message_no_reply for msg\n    // so we only need to call release_ref here.\n    msg->release_ref();\n}\n"
  },
  {
    "path": "src/runtime/test/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\nwhile read -r -a line; do\n    test_case=${line[0]}\n    gtest_filter=${line[1]}\n    output_xml=\"${REPORT_DIR}/dsn_runtime_tests_${test_case/.ini/.xml}\"\n    echo \"============ run dsn_runtime_tests ${test_case} with gtest_filter ${gtest_filter} ============\"\n    ./clear.sh\n    GTEST_OUTPUT=\"xml:${output_xml}\" GTEST_FILTER=${gtest_filter} ./dsn_runtime_tests ${test_case} < command.txt\n\n    if [ $? -ne 0 ]; then\n        echo \"run dsn_runtime_tests $test_case failed\"\n        echo \"---- ls ----\"\n        ls -l\n        if find . -name log.1.txt; then\n            echo \"---- tail -n 100 log.1.txt ----\"\n            tail -n 100 `find . -name log.1.txt`\n        fi\n        if [ -f core ]; then\n            echo \"---- gdb ./dsn_runtime_tests core ----\"\n            gdb ./dsn_runtime_tests core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n        fi\n        exit 1\n    fi\n    echo \"============ done dsn_runtime_tests ${test_case} with gtest_filter ${gtest_filter} ============\"\ndone <gtest.filter\n\necho \"============ done dsn_runtime_tests ============\"\n\n"
  },
  {
    "path": "src/runtime/test/server_negotiation_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"runtime/security/server_negotiation.h\"\n#include \"runtime/security/negotiation_utils.h\"\n#include \"runtime/rpc/network.sim.h\"\n\n#include <gtest/gtest.h>\n#include <dsn/utility/fail_point.h>\n\nnamespace dsn {\nnamespace security {\nclass server_negotiation_test : public testing::Test\n{\npublic:\n    server_negotiation_test()\n    {\n        std::unique_ptr<tools::sim_network_provider> sim_net(\n            new tools::sim_network_provider(nullptr, nullptr));\n        _sim_session =\n            sim_net->create_server_session(rpc_address(\"localhost\", 10086), rpc_session_ptr());\n        _srv_negotiation = make_unique<server_negotiation>(_sim_session);\n    }\n\n    negotiation_rpc create_negotiation_rpc(negotiation_status::type status, const std::string &msg)\n    {\n        auto request = make_unique<negotiation_request>();\n        request->status = status;\n        request->msg = dsn::blob::create_from_bytes(msg.data(), msg.length());\n        return negotiation_rpc(std::move(request), RPC_NEGOTIATION);\n    }\n\n    void on_list_mechanisms(negotiation_rpc rpc) { _srv_negotiation->on_list_mechanisms(rpc); }\n\n    void on_select_mechanism(negotiation_rpc rpc) { _srv_negotiation->on_select_mechanism(rpc); }\n\n    void on_initiate(negotiation_rpc rpc) { _srv_negotiation->on_initiate(rpc); }\n\n    void on_challenge_resp(negotiation_rpc rpc) { _srv_negotiation->on_challenge_resp(rpc); }\n\n    negotiation_status::type get_negotiation_status() { return _srv_negotiation->_status; }\n\n    // _sim_session is used for holding the sim_rpc_session which is created in ctor,\n    // in case it is released. Because negotiation keeps only a raw pointer.\n    rpc_session_ptr _sim_session;\n    std::unique_ptr<server_negotiation> _srv_negotiation;\n};\n\nTEST_F(server_negotiation_test, on_list_mechanisms)\n{\n    struct\n    {\n        negotiation_status::type req_status;\n        negotiation_status::type resp_status;\n        std::string resp_msg;\n        negotiation_status::type nego_status;\n    } tests[] = {{negotiation_status::type::SASL_LIST_MECHANISMS,\n                  negotiation_status::type::SASL_LIST_MECHANISMS_RESP,\n                  \"GSSAPI\",\n                  negotiation_status::type::SASL_LIST_MECHANISMS_RESP},\n                 {negotiation_status::type::SASL_SELECT_MECHANISMS,\n                  negotiation_status::type::INVALID,\n                  \"\",\n                  negotiation_status::type::SASL_AUTH_FAIL}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            auto rpc = create_negotiation_rpc(test.req_status, \"\");\n            on_list_mechanisms(rpc);\n\n            ASSERT_EQ(rpc.response().status, test.resp_status);\n            ASSERT_EQ(rpc.response().msg.to_string(), test.resp_msg);\n            ASSERT_EQ(get_negotiation_status(), test.nego_status);\n        }\n    }\n}\n\nTEST_F(server_negotiation_test, on_select_mechanism)\n{\n    struct\n    {\n        std::string sasl_init_result;\n        negotiation_status::type req_status;\n        std::string req_msg;\n        negotiation_status::type resp_status;\n        negotiation_status::type nego_status;\n    } tests[] = {{\n                     \"ERR_OK\",\n                     negotiation_status::type::SASL_SELECT_MECHANISMS,\n                     \"GSSAPI\",\n                     negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                     negotiation_status::type::SASL_SELECT_MECHANISMS_RESP,\n                 },\n                 {\"ERR_OK\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS,\n                  \"TEST\",\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_TIMEOUT\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS,\n                  \"GSSAPI\",\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_OK\",\n                  negotiation_status::type::SASL_INITIATE,\n                  \"GSSAPI\",\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            fail::setup();\n            fail::cfg(\"sasl_server_wrapper_init\", \"return(\" + test.sasl_init_result + \")\");\n\n            auto rpc = create_negotiation_rpc(test.req_status, test.req_msg);\n            on_select_mechanism(rpc);\n            ASSERT_EQ(rpc.response().status, test.resp_status);\n            ASSERT_EQ(get_negotiation_status(), test.nego_status);\n\n            fail::teardown();\n        }\n    }\n}\n\nTEST_F(server_negotiation_test, on_initiate)\n{\n    struct\n    {\n        std::string sasl_start_result;\n        std::string sasl_retrieve_username_result;\n        negotiation_status::type req_status;\n        negotiation_status::type resp_status;\n        negotiation_status::type nego_status;\n    } tests[] = {\n        {\"ERR_TIMEOUT\",\n         \"ERR_OK\",\n         negotiation_status::type::SASL_INITIATE,\n         negotiation_status::type::INVALID,\n         negotiation_status::type::SASL_AUTH_FAIL},\n        {\"ERR_OK\",\n         \"ERR_OK\",\n         negotiation_status::type::SASL_SELECT_MECHANISMS,\n         negotiation_status::type::INVALID,\n         negotiation_status::type::SASL_AUTH_FAIL},\n        {\"ERR_OK\",\n         \"ERR_TIMEOUT\",\n         negotiation_status::type::SASL_INITIATE,\n         negotiation_status::type::INVALID,\n         negotiation_status::type::SASL_AUTH_FAIL},\n        {\"ERR_SASL_INCOMPLETE\",\n         \"ERR_OK\",\n         negotiation_status::type::SASL_INITIATE,\n         negotiation_status::type::SASL_CHALLENGE,\n         negotiation_status::type::SASL_CHALLENGE},\n        {\"ERR_OK\",\n         \"ERR_OK\",\n         negotiation_status::type::SASL_INITIATE,\n         negotiation_status::type::SASL_SUCC,\n         negotiation_status::type::SASL_SUCC},\n    };\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            fail::setup();\n            fail::cfg(\"sasl_server_wrapper_start\", \"return(\" + test.sasl_start_result + \")\");\n            fail::cfg(\"sasl_wrapper_retrieve_username\",\n                      \"return(\" + test.sasl_retrieve_username_result + \")\");\n\n            auto rpc = create_negotiation_rpc(test.req_status, \"\");\n            on_initiate(rpc);\n            ASSERT_EQ(rpc.response().status, test.resp_status);\n            ASSERT_EQ(get_negotiation_status(), test.nego_status);\n\n            fail::teardown();\n        }\n    }\n}\n\nTEST_F(server_negotiation_test, on_challenge_resp)\n{\n    struct\n    {\n        std::string sasl_step_result;\n        std::string sasl_retrieve_username_result;\n        negotiation_status::type req_status;\n        negotiation_status::type resp_status;\n        negotiation_status::type nego_status;\n    } tests[] = {{\"ERR_TIMEOUT\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_CHALLENGE_RESP,\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_OK\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_SELECT_MECHANISMS,\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_OK\",\n                  \"ERR_TIMEOUT\",\n                  negotiation_status::type::SASL_CHALLENGE_RESP,\n                  negotiation_status::type::INVALID,\n                  negotiation_status::type::SASL_AUTH_FAIL},\n                 {\"ERR_SASL_INCOMPLETE\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_CHALLENGE_RESP,\n                  negotiation_status::type::SASL_CHALLENGE,\n                  negotiation_status::type::SASL_CHALLENGE},\n                 {\"ERR_OK\",\n                  \"ERR_OK\",\n                  negotiation_status::type::SASL_CHALLENGE_RESP,\n                  negotiation_status::type::SASL_SUCC,\n                  negotiation_status::type::SASL_SUCC}};\n\n    RPC_MOCKING(negotiation_rpc)\n    {\n        for (const auto &test : tests) {\n            fail::setup();\n            fail::cfg(\"sasl_server_wrapper_step\", \"return(\" + test.sasl_step_result + \")\");\n            fail::cfg(\"sasl_wrapper_retrieve_username\",\n                      \"return(\" + test.sasl_retrieve_username_result + \")\");\n\n            auto rpc = create_negotiation_rpc(test.req_status, \"\");\n            on_challenge_resp(rpc);\n            ASSERT_EQ(rpc.response().status, test.resp_status);\n            ASSERT_EQ(get_negotiation_status(), test.nego_status);\n\n            fail::teardown();\n        }\n    }\n}\n} // namespace security\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/service_api_c.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for c service api.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool_api.h>\n#include <dsn/tool-api/file_io.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/tool-api/zlocks.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n#include <thread>\n#include <dsn/utility/rand.h>\n#include \"runtime/service_engine.h\"\n\nusing namespace dsn;\n\nTEST(core, dsn_error)\n{\n    ASSERT_EQ(ERR_OK, dsn::error_code(\"ERR_OK\"));\n    ASSERT_STREQ(\"ERR_OK\", ERR_OK.to_string());\n}\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_FOR_TEST)\nTEST(core, dsn_threadpool_code)\n{\n    ASSERT_FALSE(dsn::threadpool_code::is_exist(\"THREAD_POOL_NOT_EXIST\"));\n    ASSERT_STREQ(\"THREAD_POOL_DEFAULT\", THREAD_POOL_DEFAULT.to_string());\n    ASSERT_EQ(THREAD_POOL_DEFAULT, dsn::threadpool_code(\"THREAD_POOL_DEFAULT\"));\n    ASSERT_LE(THREAD_POOL_DEFAULT, dsn::threadpool_code::max());\n\n    ASSERT_STREQ(\"THREAD_POOL_FOR_TEST\", THREAD_POOL_FOR_TEST.to_string());\n    ASSERT_EQ(THREAD_POOL_FOR_TEST, dsn::threadpool_code(\"THREAD_POOL_FOR_TEST\"));\n    ASSERT_LE(THREAD_POOL_FOR_TEST, dsn::threadpool_code::max());\n\n    ASSERT_LT(0, dsn::utils::get_current_tid());\n}\n\nDEFINE_TASK_CODE(TASK_CODE_COMPUTE_FOR_TEST, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE_AIO(TASK_CODE_AIO_FOR_TEST, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\nDEFINE_TASK_CODE_RPC(TASK_CODE_RPC_FOR_TEST, TASK_PRIORITY_LOW, THREAD_POOL_DEFAULT)\nTEST(core, dsn_task_code)\n{\n    dsn_task_type_t type;\n    dsn_task_priority_t pri;\n    dsn::threadpool_code pool;\n\n    ASSERT_EQ(TASK_CODE_INVALID, dsn::task_code::try_get(\"TASK_CODE_NOT_EXIST\", TASK_CODE_INVALID));\n\n    ASSERT_STREQ(\"TASK_TYPE_COMPUTE\", enum_to_string(TASK_TYPE_COMPUTE));\n\n    ASSERT_STREQ(\"TASK_PRIORITY_HIGH\", enum_to_string(TASK_PRIORITY_HIGH));\n\n    ASSERT_STREQ(\"TASK_CODE_COMPUTE_FOR_TEST\",\n                 dsn::task_code(TASK_CODE_COMPUTE_FOR_TEST).to_string());\n    ASSERT_EQ(TASK_CODE_COMPUTE_FOR_TEST,\n              dsn::task_code::try_get(\"TASK_CODE_COMPUTE_FOR_TEST\", TASK_CODE_INVALID));\n    ASSERT_LE(TASK_CODE_COMPUTE_FOR_TEST, dsn::task_code::max());\n    dsn::task_spec *spec = dsn::task_spec::get(TASK_CODE_COMPUTE_FOR_TEST.code());\n    ASSERT_EQ(TASK_TYPE_COMPUTE, spec->type);\n    ASSERT_EQ(TASK_PRIORITY_HIGH, spec->priority);\n    ASSERT_EQ(THREAD_POOL_DEFAULT, spec->pool_code);\n\n    ASSERT_STREQ(\"TASK_CODE_AIO_FOR_TEST\", dsn::task_code(TASK_CODE_AIO_FOR_TEST).to_string());\n    ASSERT_EQ(TASK_CODE_AIO_FOR_TEST,\n              dsn::task_code::try_get(\"TASK_CODE_AIO_FOR_TEST\", TASK_CODE_INVALID));\n    ASSERT_LE(TASK_CODE_AIO_FOR_TEST, dsn::task_code::max());\n    spec = dsn::task_spec::get(TASK_CODE_AIO_FOR_TEST.code());\n    ASSERT_EQ(TASK_TYPE_AIO, spec->type);\n    ASSERT_EQ(TASK_PRIORITY_COMMON, spec->priority);\n    ASSERT_EQ(THREAD_POOL_DEFAULT, spec->pool_code);\n\n    ASSERT_STREQ(\"TASK_CODE_RPC_FOR_TEST\", dsn::task_code(TASK_CODE_RPC_FOR_TEST).to_string());\n    ASSERT_EQ(TASK_CODE_RPC_FOR_TEST,\n              dsn::task_code::try_get(\"TASK_CODE_RPC_FOR_TEST\", TASK_CODE_INVALID));\n    ASSERT_LE(TASK_CODE_RPC_FOR_TEST, dsn::task_code::max());\n    spec = dsn::task_spec::get(TASK_CODE_RPC_FOR_TEST.code());\n    ASSERT_EQ(TASK_TYPE_RPC_REQUEST, spec->type);\n    ASSERT_EQ(TASK_PRIORITY_LOW, spec->priority);\n    ASSERT_EQ(THREAD_POOL_DEFAULT, spec->pool_code);\n\n    ASSERT_STREQ(\"TASK_CODE_RPC_FOR_TEST_ACK\",\n                 dsn::task_code(TASK_CODE_RPC_FOR_TEST_ACK).to_string());\n    ASSERT_EQ(TASK_CODE_RPC_FOR_TEST_ACK,\n              dsn::task_code::try_get(\"TASK_CODE_RPC_FOR_TEST_ACK\", TASK_CODE_INVALID));\n    ASSERT_LE(TASK_CODE_RPC_FOR_TEST_ACK, dsn::task_code::max());\n    spec = dsn::task_spec::get(TASK_CODE_RPC_FOR_TEST_ACK.code());\n    ASSERT_EQ(TASK_TYPE_RPC_RESPONSE, spec->type);\n    ASSERT_EQ(TASK_PRIORITY_LOW, spec->priority);\n    ASSERT_EQ(THREAD_POOL_DEFAULT, spec->pool_code);\n\n    spec = dsn::task_spec::get(TASK_CODE_COMPUTE_FOR_TEST.code());\n    spec->pool_code = THREAD_POOL_FOR_TEST;\n    spec->priority = TASK_PRIORITY_COMMON;\n    ASSERT_EQ(TASK_TYPE_COMPUTE, spec->type);\n    ASSERT_EQ(TASK_PRIORITY_COMMON, spec->priority);\n    ASSERT_EQ(THREAD_POOL_FOR_TEST, spec->pool_code);\n\n    spec->pool_code = THREAD_POOL_DEFAULT;\n    spec->priority = TASK_PRIORITY_HIGH;\n}\n\nTEST(core, dsn_config)\n{\n    ASSERT_TRUE(dsn_config_get_value_bool(\"apps.client\", \"run\", false, \"client run\"));\n    ASSERT_EQ(1u, dsn_config_get_value_uint64(\"apps.client\", \"count\", 100, \"client count\"));\n    ASSERT_EQ(1.0, dsn_config_get_value_double(\"apps.client\", \"count\", 100.0, \"client count\"));\n    ASSERT_EQ(1.0, dsn_config_get_value_double(\"apps.client\", \"count\", 100.0, \"client count\"));\n\n    std::vector<const char *> buffers;\n    dsn_config_get_all_keys(\"core.test\", buffers);\n    ASSERT_EQ(2, buffers.size());\n    ASSERT_STREQ(\"count\", buffers[0]);\n    ASSERT_STREQ(\"run\", buffers[1]);\n}\n\nTEST(core, dsn_exlock)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n    {\n        dsn::zlock l(false);\n        ASSERT_TRUE(l.try_lock());\n        l.unlock();\n        l.lock();\n        l.unlock();\n    }\n    {\n        dsn::zlock l(true);\n        ASSERT_TRUE(l.try_lock());\n        ASSERT_TRUE(l.try_lock());\n        l.unlock();\n        l.unlock();\n        l.lock();\n        l.lock();\n        l.unlock();\n        l.unlock();\n    }\n}\n\nTEST(core, dsn_rwlock)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n    dsn::zrwlock_nr l;\n    l.lock_read();\n    l.unlock_read();\n    l.lock_write();\n    l.unlock_write();\n}\n\nTEST(core, dsn_semaphore)\n{\n    if (dsn::service_engine::instance().spec().semaphore_factory_name ==\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n    dsn::zsemaphore s(2);\n    s.wait();\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.wait(10));\n    s.signal(1);\n    s.wait();\n}\n\nTEST(core, dsn_env)\n{\n    if (dsn::service_engine::instance().spec().tool == \"simulator\")\n        return;\n    uint64_t now1 = dsn_now_ns();\n    std::this_thread::sleep_for(std::chrono::milliseconds(1));\n    uint64_t now2 = dsn_now_ns();\n    ASSERT_LE(now1 + 1000000, now2);\n    uint64_t r = rand::next_u64(100, 200);\n    ASSERT_LE(100, r);\n    ASSERT_GE(200, r);\n}\n\nTEST(core, dsn_system)\n{\n    ASSERT_TRUE(tools::is_engine_ready());\n    tools::tool_app *tool = tools::get_current_tool();\n    ASSERT_EQ(tool->name(), dsn_config_get_value_string(\"core\", \"tool\", \"\", \"\"));\n\n    int app_count = 5;\n    int type_count = 1;\n    if (tool->get_service_spec().enable_default_app_mimic) {\n        app_count++;\n        type_count++;\n    }\n\n    {\n        std::vector<service_app *> apps;\n        service_app::get_all_service_apps(&apps);\n        ASSERT_EQ(app_count, apps.size());\n        std::map<std::string, int> type_to_count;\n        for (int i = 0; i < apps.size(); ++i) {\n            type_to_count[apps[i]->info().type] += 1;\n        }\n\n        ASSERT_EQ(type_count, static_cast<int>(type_to_count.size()));\n        ASSERT_EQ(5, type_to_count[\"test\"]);\n    }\n}\n"
  },
  {
    "path": "src/runtime/test/sim_lock.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for sim lock.\n *\n * Revision history:\n *     Nov., 2015, @xiaotz (Xiaotong Zhang), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/service_api_c.h>\n#include <dsn/tool_api.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/synchronize.h>\n#include <gtest/gtest.h>\n#include <thread>\n#include \"runtime/service_engine.h\"\n#include \"runtime/task/task_engine.sim.h\"\n#include \"runtime/scheduler.h\"\n\nTEST(tools_simulator, dsn_semaphore)\n{\n    if (dsn::task::get_current_worker() == nullptr)\n        return;\n    if (dsn::service_engine::instance().spec().semaphore_factory_name !=\n        \"dsn::tools::sim_semaphore_provider\")\n        return;\n    dsn::zsemaphore s(2);\n    s.wait();\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.wait(0));\n    s.signal(1);\n    s.wait();\n}\n\nTEST(tools_simulator, dsn_lock_nr)\n{\n    if (dsn::task::get_current_worker() == nullptr)\n        return;\n    if (dsn::service_engine::instance().spec().lock_nr_factory_name !=\n        \"dsn::tools::sim_lock_nr_provider\")\n        return;\n\n    dsn::tools::sim_lock_nr_provider *s = new dsn::tools::sim_lock_nr_provider(nullptr);\n    s->lock();\n    s->unlock();\n    EXPECT_TRUE(s->try_lock());\n    s->unlock();\n    delete s;\n}\n\nTEST(tools_simulator, dsn_lock)\n{\n    if (dsn::task::get_current_worker() == nullptr)\n        return;\n    if (dsn::service_engine::instance().spec().lock_factory_name != \"dsn::tools::sim_lock_provider\")\n        return;\n\n    dsn::tools::sim_lock_provider *s = new dsn::tools::sim_lock_provider(nullptr);\n    s->lock();\n    EXPECT_TRUE(s->try_lock());\n    s->unlock();\n    s->unlock();\n    delete s;\n}\n\nnamespace dsn {\nnamespace test {\ntypedef std::function<void()> system_callback;\n}\n}\nTEST(tools_simulator, scheduler)\n{\n    if (dsn::task::get_current_worker() == nullptr)\n        return;\n    if (dsn::service_engine::instance().spec().tool != \"simulator\")\n        return;\n\n    dsn::tools::sim_worker_state *s =\n        dsn::tools::scheduler::task_worker_ext::get(dsn::task::get_current_worker());\n    dsn::utils::notify_event *evt = new dsn::utils::notify_event();\n    dsn::test::system_callback callback = [evt, s](void) {\n        evt->notify();\n        s->is_continuation_ready = true;\n        return;\n    };\n    dsn::tools::scheduler::instance().add_system_event(100, callback);\n    dsn::tools::scheduler::instance().wait_schedule(true, false);\n    evt->wait();\n}\n"
  },
  {
    "path": "src/runtime/test/task_engine.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for task engine.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"runtime/task/task_engine.h\"\n#include \"test_utils.h\"\n#include <dsn/tool_api.h>\n#include <gtest/gtest.h>\n#include <sstream>\n\nusing namespace ::dsn;\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_FOR_TEST_1)\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_FOR_TEST_2)\n\nTEST(core, task_engine)\n{\n    if (dsn::service_engine::instance().spec().tool == \"simulator\")\n        return;\n    service_node *node = task::get_current_node2();\n    ASSERT_NE(nullptr, node);\n    ASSERT_STREQ(\"client\", node->full_name());\n\n    task_engine *engine = node->computation();\n    ASSERT_NE(nullptr, engine);\n\n    ASSERT_TRUE(engine->is_started());\n    std::vector<std::string> args;\n    std::stringstream oss;\n    engine->get_runtime_info(\"  \", args, oss);\n    printf(\"%s\\n\", oss.str().c_str());\n\n    std::vector<task_worker_pool *> &pools = engine->pools();\n    for (size_t i = 0; i < pools.size(); ++i) {\n        if (i == THREAD_POOL_DEFAULT || i == THREAD_POOL_TEST_SERVER ||\n            i == THREAD_POOL_FOR_TEST_1 || i == THREAD_POOL_FOR_TEST_2) {\n            ASSERT_NE(nullptr, pools[i]);\n        }\n    }\n\n    task_worker_pool *pool1 = engine->get_pool(THREAD_POOL_FOR_TEST_1);\n    ASSERT_NE(nullptr, pool1);\n    ASSERT_EQ(pools[THREAD_POOL_FOR_TEST_1], pool1);\n    const threadpool_spec &spec1 = pool1->spec();\n    ASSERT_EQ(\"THREAD_POOL_FOR_TEST_1\", spec1.name);\n    ASSERT_EQ(engine, pool1->engine());\n    ASSERT_EQ(task::get_current_node2(), pool1->node());\n    std::vector<task_queue *> queues1 = pool1->queues();\n    ASSERT_EQ(1u, queues1.size());\n    std::vector<task_worker *> workers1 = pool1->workers();\n    ASSERT_EQ(2u, workers1.size());\n\n    task_worker_pool *pool2 = engine->get_pool(THREAD_POOL_FOR_TEST_2);\n    ASSERT_NE(nullptr, pool2);\n    ASSERT_EQ(pools[THREAD_POOL_FOR_TEST_2], pool2);\n    const threadpool_spec &spec2 = pool2->spec();\n    ASSERT_EQ(\"THREAD_POOL_FOR_TEST_2\", spec2.name);\n    ASSERT_EQ(engine, pool2->engine());\n    ASSERT_EQ(task::get_current_node2(), pool2->node());\n    std::vector<task_queue *> queues2 = pool2->queues();\n    ASSERT_EQ(2u, queues2.size());\n    std::vector<task_worker *> workers2 = pool2->workers();\n    ASSERT_EQ(2u, workers2.size());\n}\n"
  },
  {
    "path": "src/runtime/test/task_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/task_code.h>\n#include <gtest/gtest.h>\n#include <dsn/tool-api/file_io.h>\n\nnamespace dsn {\n\nDEFINE_TASK_CODE_AIO(LPC_TASK_TEST, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT)\n\nclass task_test : public ::testing::Test\n{\npublic:\n    static void test_init()\n    {\n        aio_task t1(LPC_TASK_TEST, nullptr);\n        ASSERT_TRUE(t1._is_null);\n        ASSERT_EQ(t1._wait_event.load(), nullptr);\n        ASSERT_EQ(t1.next, nullptr);\n        ASSERT_EQ(t1._state, task_state::TASK_STATE_READY);\n        ASSERT_FALSE(t1._wait_for_cancel);\n\n        // TODO(wutao1): raw_task and rpc_request_task is not safe for\n        //               null callback.\n    }\n\n    static void test_null_task()\n    {\n        aio_task_ptr t1 = new aio_task(LPC_TASK_TEST, nullptr);\n\n        // empty task will executed at once\n        t1->enqueue(ERR_OK, 100);\n        ASSERT_EQ(t1->_state, task_state::TASK_STATE_FINISHED);\n\n        // never wait for an empty task\n        ASSERT_TRUE(t1->wait(10000));\n        ASSERT_EQ(t1->_state, task_state::TASK_STATE_FINISHED);\n        ASSERT_TRUE(t1->_wait_event.load() == nullptr);\n        ASSERT_TRUE(t1->_is_null);\n    }\n\n    static void test_signal_finished_task()\n    {\n        disk_file *fp = file::open(\"config-test.ini\", O_RDONLY | O_BINARY, 0);\n\n        // this aio task is enqueued into read-queue of disk_engine\n        char buffer[128];\n        // in simulator environment this task will be executed immediately,\n        // so we excluded config-test-sim.ini for this test.\n        auto t = file::read(fp, buffer, 128, 0, LPC_TASK_TEST, nullptr, nullptr);\n\n        t->wait(10000);\n        ASSERT_EQ(t->_state, task_state::TASK_STATE_FINISHED);\n\n        // signal a finished task won't cause failure\n        t->signal_waiters(); // signal_waiters may return false\n        t->signal_waiters();\n    }\n};\n\nTEST_F(task_test, init) { test_init(); }\n\nTEST_F(task_test, null_task) { test_null_task(); }\n\nTEST_F(task_test, signal_finished_task) { test_signal_finished_task(); }\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/test/test_utils.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/tool-api/task.h>\n#include <dsn/tool-api/task_worker.h>\n#include <gtest/gtest.h>\n#include <iostream>\n\nusing namespace ::dsn;\n\n#ifndef TEST_PORT_BEGIN\n#define TEST_PORT_BEGIN 20201\n#define TEST_PORT_END 20203\n#endif\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_TEST_SERVER)\nDEFINE_TASK_CODE_RPC(RPC_TEST_HASH, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nDEFINE_TASK_CODE_RPC(RPC_TEST_HASH1, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\nDEFINE_TASK_CODE_RPC(RPC_TEST_HASH2, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\nDEFINE_TASK_CODE_RPC(RPC_TEST_HASH3, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\nDEFINE_TASK_CODE_RPC(RPC_TEST_HASH4, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nDEFINE_TASK_CODE_RPC(RPC_TEST_STRING_COMMAND, TASK_PRIORITY_COMMON, THREAD_POOL_TEST_SERVER)\n\nextern int g_test_count;\nextern int g_test_ret;\n\ninline void exec_tests()\n{\n    g_test_ret = RUN_ALL_TESTS();\n    g_test_count++;\n}\n\nclass test_client : public ::dsn::serverlet<test_client>, public ::dsn::service_app\n{\npublic:\n    test_client(const service_app_info *info)\n        : ::dsn::serverlet<test_client>(\"test-server\"), ::dsn::service_app(info)\n    {\n    }\n\n    void on_rpc_test(const int &test_id, ::dsn::rpc_replier<std::string> &replier)\n    {\n        std::string r = ::dsn::task::get_current_worker()->name();\n        replier(r);\n    }\n\n    void on_rpc_string_test(dsn::message_ex *message)\n    {\n        std::string command;\n        ::dsn::unmarshall(message, command);\n\n        if (command == \"expect_talk_to_others\") {\n            dsn::rpc_address next_addr = dsn::service_app::primary_address();\n            if (next_addr.port() != TEST_PORT_END) {\n                next_addr.assign_ipv4(next_addr.ip(), next_addr.port() + 1);\n                ddebug(\"test_client_server, talk_to_others: %s\", next_addr.to_string());\n                dsn_rpc_forward(message, next_addr);\n            } else {\n                ddebug(\"test_client_server, talk_to_me: %s\", next_addr.to_string());\n                reply(message, next_addr.to_std_string());\n            }\n        } else if (command == \"expect_no_reply\") {\n            if (dsn::service_app::primary_address().port() == TEST_PORT_END) {\n                ddebug(\"test_client_server, talk_with_reply: %s\",\n                       dsn::service_app::primary_address().to_std_string().c_str());\n                reply(message, dsn::service_app::primary_address().to_std_string());\n            }\n        } else if (command.substr(0, 5) == \"echo \") {\n            reply(message, command.substr(5));\n        } else {\n            derror(\"unknown command\");\n        }\n    }\n\n    ::dsn::error_code start(const std::vector<std::string> &args)\n    {\n        // server\n        if (args.size() == 1) {\n            register_async_rpc_handler(RPC_TEST_HASH, \"rpc.test.hash\", &test_client::on_rpc_test);\n            // used for corrupted message test\n            register_async_rpc_handler(RPC_TEST_HASH1, \"rpc.test.hash1\", &test_client::on_rpc_test);\n            register_async_rpc_handler(RPC_TEST_HASH2, \"rpc.test.hash2\", &test_client::on_rpc_test);\n            register_async_rpc_handler(RPC_TEST_HASH3, \"rpc.test.hash3\", &test_client::on_rpc_test);\n            register_async_rpc_handler(RPC_TEST_HASH4, \"rpc.test.hash4\", &test_client::on_rpc_test);\n\n            register_rpc_handler(RPC_TEST_STRING_COMMAND,\n                                 \"rpc.test.string.command\",\n                                 &test_client::on_rpc_string_test);\n        }\n\n        // client\n        else {\n            std::cout << \"=========================================================== \"\n                      << std::endl;\n            std::cout << \"================== run in rDSN threads ==================== \"\n                      << std::endl;\n            std::cout << \"=========================================================== \"\n                      << std::endl;\n            exec_tests();\n        }\n\n        return ::dsn::ERR_OK;\n    }\n\n    ::dsn::error_code stop(bool cleanup = false) { return ERR_OK; }\n};\n"
  },
  {
    "path": "src/runtime/test/thrift_message_parser_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/crc.h>\n#include <dsn/utility/endians.h>\n#include <dsn/cpp/serialization_helper/thrift_helper.h>\n#include <dsn/cpp/serialization_helper/dsn.layer2_types.h>\n\n#include \"runtime/rpc/thrift_message_parser.h\"\n\nnamespace dsn {\n\nDEFINE_TASK_CODE_RPC(RPC_TEST_THRIFT_MESSAGE_PARSER, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT)\n\nclass thrift_message_parser_test : public testing::Test\n{\npublic:\n    void\n    mock_reader_read_data(message_reader &reader, const std::string &data, int message_count = 1)\n    {\n        char *buf = reader.read_buffer_ptr(data.length() * message_count);\n        for (int i = 0; i < message_count; i++) {\n            memcpy(buf + i * data.length(), data.c_str(), data.length());\n            reader.mark_read(data.length());\n        }\n    }\n\n    void test_get_message_on_receive_v0_data(message_reader &reader,\n                                             apache::thrift::protocol::TMessageType messageType,\n                                             bool is_request,\n                                             int message_count = 1)\n    {\n        /// write rpc message\n        size_t body_length = 0;\n        message_ptr msg =\n            message_ex::create_request(RPC_TEST_THRIFT_MESSAGE_PARSER, 1000, 64, 5000000000);\n        rpc_write_stream stream(msg);\n        binary_writer_transport binary_transport(stream);\n        boost::shared_ptr<binary_writer_transport> trans_ptr(&binary_transport,\n                                                             [](binary_writer_transport *) {});\n        ::apache::thrift::protocol::TBinaryProtocol oprot(trans_ptr);\n        body_length += oprot.writeMessageBegin(\"RPC_TEST_THRIFT_MESSAGE_PARSER\", messageType, 999);\n        body_length += oprot.writeMessageEnd();\n        stream.commit_buffer();\n\n        thrift_message_parser parser;\n        std::string data;\n        int read_next = 0;\n        data = std::string(\"THFT\") + std::string(44 + body_length, '\\0'); // header+body_length\n        data_output out(&data[4], 44);\n        out.write_u32(0);           // hdr_version\n        out.write_u32(48);          // hdr_length\n        out.write_u32(0);           // hdr_crc32\n        out.write_u32(body_length); // body_length\n        out.write_u32(0);           // body_crc32\n        out.write_u32(1);           // app_id\n        out.write_u32(28);          // partition_index\n        out.write_u32(1000);        // client_timeout\n        out.write_u32(64);          // client_thread_hash\n        out.write_u64(5000000000);  // client_partition_hash\n        ASSERT_EQ(stream.get_buffer().size(), body_length);\n        memcpy(&data[48], stream.get_buffer().data(), stream.get_buffer().size());\n\n        mock_reader_read_data(reader, data, message_count);\n\n        for (int i = 0; i < message_count; i++) {\n            msg = parser.get_message_on_receive(&reader, read_next);\n\n            if (is_request) {\n                ASSERT_NE(msg, nullptr);\n                ASSERT_EQ(msg->hdr_format, NET_HDR_THRIFT);\n\n                ASSERT_EQ(msg->header->body_length, body_length);\n                ASSERT_EQ(msg->header->gpid, gpid(1, 28));\n                ASSERT_EQ(msg->header->hdr_type, THRIFT_HDR_SIG);\n                ASSERT_EQ(msg->header->hdr_length, sizeof(message_header));\n                ASSERT_EQ(msg->header->hdr_crc32, CRC_INVALID);\n                ASSERT_EQ(msg->header->body_crc32, CRC_INVALID);\n                ASSERT_EQ(msg->header->id, 999);\n\n                ASSERT_EQ(msg->header->client.timeout_ms, 1000);\n                ASSERT_EQ(msg->header->client.thread_hash, 64);\n                ASSERT_EQ(msg->header->client.partition_hash, 5000000000);\n\n                ASSERT_EQ(msg->header->context.u.is_request, true);\n                ASSERT_EQ(msg->header->context.u.serialize_format, DSF_THRIFT_BINARY);\n\n                // v0 Thrift network format doesn't support message context.\n                ASSERT_EQ(msg->header->context.u.is_backup_request, false);\n                ASSERT_EQ(msg->header->context.u.is_forwarded, false);\n                ASSERT_EQ(msg->header->context.u.is_forward_supported, false);\n\n                ASSERT_EQ(msg->buffers[1].size(), body_length);\n\n                // must be reset\n                ASSERT_EQ(parser._header_version, -1);\n                ASSERT_EQ(parser._v1_specific_vars->_meta_parsed, false);\n                ASSERT_EQ(parser._v1_specific_vars->_meta_length, 0);\n                ASSERT_EQ(parser._v1_specific_vars->_body_length, 0);\n            } else {\n                ASSERT_EQ(msg, nullptr);\n                ASSERT_EQ(read_next, -1);\n            }\n        }\n    }\n\n    void test_get_message_on_receive_v1_data(message_reader &reader,\n                                             apache::thrift::protocol::TMessageType messageType,\n                                             bool is_request,\n                                             bool is_backup_request,\n                                             int message_count = 1)\n    {\n        /// write rpc message\n        size_t body_length = 0;\n        message_ptr msg =\n            message_ex::create_request(RPC_TEST_THRIFT_MESSAGE_PARSER, 1000, 64, 5000000000);\n        rpc_write_stream body_stream(msg);\n        {\n            binary_writer_transport transport(body_stream);\n            boost::shared_ptr<binary_writer_transport> trans_ptr(&transport,\n                                                                 [](binary_writer_transport *) {});\n            ::apache::thrift::protocol::TBinaryProtocol oprot(trans_ptr);\n            body_length +=\n                oprot.writeMessageBegin(\"RPC_TEST_THRIFT_MESSAGE_PARSER\", messageType, 999);\n            body_length += oprot.writeMessageEnd();\n            body_stream.commit_buffer();\n            ASSERT_EQ(body_stream.get_buffer().size(), body_length);\n        }\n\n        // write rpc meta\n        size_t meta_length = 0;\n        thrift_request_meta_v1 meta;\n        meta.__set_is_backup_request(is_backup_request);\n        meta.__set_app_id(1);\n        meta.__set_partition_index(28);\n        meta.__set_client_timeout(1000);\n        meta.__set_client_partition_hash(5000000000);\n\n        binary_writer meta_writer(1024);\n        ::dsn::binary_writer_transport trans(meta_writer);\n        boost::shared_ptr<::dsn::binary_writer_transport> transport(\n            &trans, [](::dsn::binary_writer_transport *) {});\n        ::apache::thrift::protocol::TBinaryProtocol proto(transport);\n        meta.write(&proto);\n\n        meta_length = meta_writer.get_buffer().size();\n\n        thrift_message_parser parser;\n        std::string data;\n        int read_next = 0;\n        data = std::string(\"THFT\") + std::string(12 + meta_length + body_length, '\\0');\n        data_output out(&data[4], 12);\n        out.write_u32(1);\n        out.write_u32(meta_length);\n        out.write_u32(body_length);\n\n        memcpy(&data[16], meta_writer.get_buffer().data(), meta_writer.get_buffer().size());\n        memcpy(&data[16 + meta_length],\n               body_stream.get_buffer().data(),\n               body_stream.get_buffer().size());\n        ASSERT_EQ(16 + meta_length + body_length, data.size());\n        mock_reader_read_data(reader, data, message_count);\n        ASSERT_EQ(reader.buffer().size(), data.size() * message_count);\n\n        for (int i = 0; i != message_count; ++i) {\n            msg = parser.get_message_on_receive(&reader, read_next);\n\n            if (is_request) {\n                ASSERT_NE(msg, nullptr);\n                ASSERT_EQ(msg->hdr_format, NET_HDR_THRIFT);\n\n                ASSERT_EQ(msg->header->body_length, body_length);\n                ASSERT_EQ(msg->header->gpid, gpid(1, 28));\n                ASSERT_EQ(msg->header->hdr_type, THRIFT_HDR_SIG);\n                ASSERT_EQ(msg->header->hdr_length, sizeof(message_header));\n                ASSERT_EQ(msg->header->hdr_crc32, CRC_INVALID);\n                ASSERT_EQ(msg->header->body_crc32, CRC_INVALID);\n                ASSERT_EQ(msg->header->id, 999);\n\n                ASSERT_EQ(msg->header->client.timeout_ms, 1000);\n                ASSERT_EQ(msg->header->client.thread_hash, 7947);\n                ASSERT_EQ(msg->header->client.partition_hash, 5000000000);\n\n                ASSERT_EQ(msg->header->context.u.is_request, true);\n                ASSERT_EQ(msg->header->context.u.serialize_format, DSF_THRIFT_BINARY);\n                ASSERT_EQ(msg->header->context.u.is_backup_request, is_backup_request);\n                ASSERT_EQ(msg->header->context.u.is_forwarded, false);\n                ASSERT_EQ(msg->header->context.u.is_forward_supported, false);\n\n                // must be reset\n                ASSERT_EQ(parser._header_version, -1);\n                ASSERT_EQ(parser._v1_specific_vars->_meta_parsed, false);\n                ASSERT_EQ(msg->buffers[1].size(), body_length);\n            } else {\n                ASSERT_EQ(msg, nullptr);\n                ASSERT_EQ(read_next, -1);\n            }\n        }\n    }\n};\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_incomplete_second_field)\n{\n    for (int i = 0; i < 4; i++) {\n        thrift_message_parser parser;\n\n        std::string data;\n        int read_next = 0;\n        message_reader reader(64);\n        data = std::string(\"THFT\") + std::string(i, ' ');\n        mock_reader_read_data(reader, data);\n        ASSERT_EQ(reader._buffer_occupied, 4 + i);\n        ASSERT_EQ(reader.buffer().size(), 4 + i);\n\n        message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(read_next, 4 - i);\n        ASSERT_EQ(parser._header_version, -1);\n        ASSERT_EQ(parser._v1_specific_vars->_meta_parsed, false);\n        ASSERT_EQ(parser._v1_specific_vars->_meta_length, 0);\n\n        // not consumed\n        ASSERT_EQ(reader._buffer_occupied, data.length());\n        ASSERT_EQ(reader.buffer().size(), data.length());\n    }\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_incomplete_v0_hdr_len)\n{\n    for (int i = 4; i < 44; i++) {\n        thrift_message_parser parser;\n\n        std::string data;\n        int read_next = 0;\n        message_reader reader(64);\n        data = std::string(\"THFT\") + std::string(i, ' ');\n\n        data_output out(&data[4], 8);\n        out.write_u32(0);\n        out.write_u32(48);\n\n        mock_reader_read_data(reader, data);\n        ASSERT_EQ(reader.buffer().size(), data.length());\n\n        message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(read_next, 48 - data.length()); // read remaining fields\n        ASSERT_EQ(parser._header_version, -1);\n    }\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_invalid_v0_hdr_length)\n{\n    for (int i = 0; i < 48; i++) {\n        thrift_message_parser parser;\n\n        std::string data;\n        int read_next = 0;\n        message_reader reader(64);\n        data = std::string(\"THFT\") + std::string(44, '\\0'); // full 48 bytes\n\n        // hdr_version = 0\n        data_output out(&data[4], 8);\n        out.write_u32(0);\n        // hdr_length = i\n        out.write_u32(i);\n\n        mock_reader_read_data(reader, data);\n        message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(read_next, -1);\n        ASSERT_EQ(parser._header_version, -1);\n    }\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_valid_v0_hdr)\n{\n    thrift_message_parser parser;\n    std::string data;\n    int read_next = 0;\n    message_reader reader(64);\n    data = std::string(\"THFT\") + std::string(44, '\\0'); // full 48 bytes\n    data_output out(&data[4], 44);\n    out.write_u32(0);          // hdr_version\n    out.write_u32(48);         // hdr_length\n    out.write_u32(0);          // hdr_crc32\n    out.write_u32(100);        // body_length\n    out.write_u32(0);          // body_crc32\n    out.write_u32(1);          // app_id\n    out.write_u32(28);         // partition_index\n    out.write_u32(1000);       // client_timeout\n    out.write_u32(64);         // client_thread_hash\n    out.write_u64(5000000000); // client_partition_hash\n\n    mock_reader_read_data(reader, data);\n\n    message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n    ASSERT_EQ(msg, nullptr);\n    ASSERT_EQ(read_next, 100); // required to read more\n    ASSERT_EQ(parser._header_version, 0);\n    ASSERT_EQ(reader.buffer().size(), 0);\n    ASSERT_EQ(parser._meta_v0->hdr_crc32, 0);\n    ASSERT_EQ(parser._meta_v0->body_length, 100);\n    ASSERT_EQ(parser._meta_v0->body_crc32, 0);\n    ASSERT_EQ(parser._meta_v0->app_id, 1);\n    ASSERT_EQ(parser._meta_v0->partition_index, 28);\n    ASSERT_EQ(parser._meta_v0->client_timeout, 1000);\n    ASSERT_EQ(parser._meta_v0->client_thread_hash, 64);\n    ASSERT_EQ(parser._meta_v0->client_partition_hash, 5000000000);\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_valid_v0_data)\n{\n    message_reader reader(64);\n\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v0_data(reader, apache::thrift::protocol::T_CALL, true));\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v0_data(reader, apache::thrift::protocol::T_ONEWAY, true));\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_v0_not_request)\n{\n    message_reader reader(64);\n\n    // ensure server won't corrupt when it receives a non-request.\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v0_data(reader, apache::thrift::protocol::T_REPLY, false));\n    // bad message should be consumed and discarded\n    ASSERT_EQ(reader.buffer().size(), 0);\n    ASSERT_NO_FATAL_FAILURE(test_get_message_on_receive_v0_data(\n        reader, apache::thrift::protocol::TMessageType(65), false));\n    ASSERT_EQ(reader.buffer().size(), 0);\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_incomplete_v1_hdr)\n{\n    for (int i = 4; i < 12; i++) {\n        thrift_message_parser parser;\n\n        std::string data;\n        int read_next = 0;\n        message_reader reader(64);\n        data = std::string(\"THFT\") + std::string(i, ' ');\n\n        data_output out(&data[4], 8);\n        out.write_u32(1);\n\n        mock_reader_read_data(reader, data);\n        ASSERT_EQ(reader.buffer().size(), data.length());\n\n        message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n        ASSERT_EQ(msg, nullptr);\n        ASSERT_EQ(read_next, 16 - data.length()); // read remaining fields\n        ASSERT_EQ(parser._header_version, -1);\n        ASSERT_EQ(parser._v1_specific_vars->_meta_length, 0);\n        ASSERT_EQ(parser._v1_specific_vars->_body_length, 0);\n    }\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_valid_v1_hdr)\n{\n    thrift_message_parser parser;\n    std::string data;\n    int read_next = 0;\n    message_reader reader(64);\n    data = std::string(\"THFT\") + std::string(12, '\\0'); // full 12 bytes\n    data_output out(&data[4], 12);\n    out.write_u32(1);   // header_version\n    out.write_u32(100); // meta_length\n    out.write_u32(200); // body_length\n\n    mock_reader_read_data(reader, data);\n    ASSERT_EQ(reader.buffer().size(), 16);\n\n    message_ex *msg = parser.get_message_on_receive(&reader, read_next);\n    ASSERT_EQ(msg, nullptr);\n    ASSERT_EQ(read_next, 100); // required to read more\n    ASSERT_EQ(parser._header_version, 1);\n    ASSERT_EQ(parser._v1_specific_vars->_meta_length, 100);\n    ASSERT_EQ(parser._v1_specific_vars->_body_length, 200);\n    ASSERT_EQ(parser._v1_specific_vars->_meta_parsed, false);\n    ASSERT_EQ(reader.buffer().size(), 0);\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_receive_v1_data)\n{\n    message_reader reader(64);\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v1_data(reader, apache::thrift::protocol::T_CALL, true, true));\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v1_data(reader, apache::thrift::protocol::T_CALL, true, false));\n    ASSERT_NO_FATAL_FAILURE(test_get_message_on_receive_v1_data(\n        reader, apache::thrift::protocol::T_ONEWAY, true, true));\n    ASSERT_NO_FATAL_FAILURE(test_get_message_on_receive_v1_data(\n        reader, apache::thrift::protocol::T_ONEWAY, true, false));\n\n    ASSERT_NO_FATAL_FAILURE(test_get_message_on_receive_v1_data(\n        reader, apache::thrift::protocol::TMessageType(65), false, false));\n    reader.truncate_read();\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_large_writes_pileup_v0)\n{\n    message_reader reader(4096);\n    ASSERT_NO_FATAL_FAILURE(\n        test_get_message_on_receive_v0_data(reader, apache::thrift::protocol::T_CALL, true, 10));\n}\n\nTEST_F(thrift_message_parser_test, get_message_on_large_writes_pileup_v1)\n{\n    message_reader reader(4096);\n    ASSERT_NO_FATAL_FAILURE(test_get_message_on_receive_v1_data(\n        reader, apache::thrift::protocol::T_CALL, true, true, 10));\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/threadpool_code.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool-api/threadpool_code.h>\n#include <dsn/utility/customizable_id.h>\n\nnamespace dsn {\n\n/*static*/\nint threadpool_code::max()\n{\n    return dsn::utils::customized_id_mgr<dsn::threadpool_code>::instance().max_value();\n}\n/*static*/\nbool threadpool_code::is_exist(const char *name)\n{\n    return dsn::utils::customized_id_mgr<dsn::threadpool_code>::instance().get_id(name) != -1;\n}\n\nthreadpool_code::threadpool_code(const char *name)\n{\n    _internal_code =\n        dsn::utils::customized_id_mgr<dsn::threadpool_code>::instance().register_id(name);\n}\n\nconst char *threadpool_code::to_string() const\n{\n    return dsn::utils::customized_id_mgr<dsn::threadpool_code>::instance().get_name(_internal_code);\n}\n}\n"
  },
  {
    "path": "src/runtime/tool_api.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool_api.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/utility/singleton_store.h>\n#include \"runtime/service_engine.h\"\n#include \"runtime/rpc/message_parser_manager.h\"\n\nnamespace dsn {\n\nDEFINE_TASK_CODE(LPC_CONTROL_SERVICE_APP, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT)\n\nclass service_control_task : public task\n{\npublic:\n    service_control_task(service_node *node, bool start, bool cleanup = false)\n        : task(LPC_CONTROL_SERVICE_APP, 0, node), _node(node), _start(start), _cleanup(cleanup)\n    {\n    }\n\n    void exec()\n    {\n        auto &sp = _node->spec();\n\n        if (_start) {\n            error_code err;\n            err = _node->start_app();\n            dassert(err == ERR_OK, \"start app failed, err = %s\", err.to_string());\n        } else {\n            ddebug(\"stop app result(%s)\", _node->stop_app(_cleanup).to_string());\n        }\n    }\n\nprivate:\n    service_node *_node;\n    bool _start;   // false for stop\n    bool _cleanup; // for stop\n};\n\nnamespace tools {\n\ntool_base::tool_base(const char *name) { _name = name; }\n\ntoollet::toollet(const char *name) : tool_base(name) {}\n\ntool_app::tool_app(const char *name) : tool_base(name) {}\n\nvoid tool_app::start_all_apps()\n{\n    const auto &apps = service_engine::instance().get_all_nodes();\n    for (const auto &kv : apps) {\n        task *t = new service_control_task(kv.second.get(), true);\n        t->set_delay(1000 * kv.second.get()->spec().delay_seconds);\n        t->enqueue();\n    }\n}\n\nvoid tool_app::stop_all_apps(bool cleanup)\n{\n    const auto &apps = service_engine::instance().get_all_nodes();\n    for (const auto &kv : apps) {\n        task *t = new service_control_task(kv.second.get(), false, cleanup);\n        t->enqueue();\n    }\n}\n\nconst service_spec &tool_app::get_service_spec() { return service_engine::instance().spec(); }\n\nconst service_spec &spec() { return service_engine::instance().spec(); }\n\nconst char *get_service_node_name(service_node *node) { return node->full_name(); }\n\njoin_point<void> sys_init_before_app_created(\"system.init.1\");\njoin_point<void> sys_init_after_app_created(\"system.init.2\");\njoin_point<void, sys_exit_type> sys_exit(\"system.exit\");\n\nnamespace internal_use_only {\nbool register_toollet(const char *name, toollet::factory f, ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<toollet>::register_factory(name, f, type);\n}\n\nbool register_tool(const char *name, tool_app::factory f, ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<tool_app>::register_factory(name, f, type);\n}\n\nbool register_component_provider(const char *name,\n                                 timer_service::factory f,\n                                 ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<timer_service>::register_factory(name, f, type);\n}\n\nbool register_component_provider(const char *name, task_queue::factory f, ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<task_queue>::register_factory(name, f, type);\n}\n\nbool register_component_provider(const char *name,\n                                 task_worker::factory f,\n                                 ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<task_worker>::register_factory(name, f, type);\n}\n\nbool register_component_provider(const char *name, network::factory f, ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<network>::register_factory(name, f, type);\n}\n\nbool register_component_provider(const char *name,\n                                 env_provider::factory f,\n                                 ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<env_provider>::register_factory(name, f, type);\n}\n\nbool register_component_provider(network_header_format fmt,\n                                 const std::vector<const char *> &signatures,\n                                 message_parser::factory f,\n                                 size_t sz)\n{\n    message_parser_manager::instance().register_factory(fmt, signatures, f, sz);\n    return true;\n}\n\ntoollet *get_toollet(const char *name, ::dsn::provider_type type)\n{\n    toollet *tlt = nullptr;\n    if (utils::singleton_store<std::string, toollet *>::instance().get(name, tlt))\n        return tlt;\n    else {\n        tlt = utils::factory_store<toollet>::create(name, type, name);\n        utils::singleton_store<std::string, toollet *>::instance().put(name, tlt);\n        return tlt;\n    }\n}\n} // namespace internal_use_only\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/runtime/tracer.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/toollet/tracer.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/aio_task.h>\n\nnamespace dsn {\nnamespace tools {\n\nstatic void tracer_on_task_create(task *caller, task *callee)\n{\n    dsn_task_type_t type = callee->spec().type;\n    if (TASK_TYPE_RPC_REQUEST == type) {\n        rpc_request_task *tsk = (rpc_request_task *)callee;\n        ddebug(\"%s CREATE, task_id = %016\" PRIx64\n               \", type = %s, rpc_name = %s, trace_id = %016\" PRIx64 \"\",\n               callee->spec().name.c_str(),\n               callee->id(),\n               enum_to_string(type),\n               tsk->get_request()->header->rpc_name,\n               tsk->get_request()->header->trace_id);\n    } else if (TASK_TYPE_RPC_RESPONSE == type) {\n        rpc_response_task *tsk = (rpc_response_task *)callee;\n        ddebug(\"%s CREATE, task_id = %016\" PRIx64\n               \", type = %s, rpc_name = %s, trace_id = %016\" PRIx64 \"\",\n               callee->spec().name.c_str(),\n               callee->id(),\n               enum_to_string(type),\n               tsk->get_request()->header->rpc_name,\n               tsk->get_request()->header->trace_id);\n    } else {\n        ddebug(\"%s CREATE, task_id = %016\" PRIx64 \", type = %s\",\n               callee->spec().name.c_str(),\n               callee->id(),\n               enum_to_string(type));\n    }\n}\n\nstatic void tracer_on_task_enqueue(task *caller, task *callee)\n{\n    ddebug(\"%s ENQUEUE, task_id = %016\" PRIx64 \", delay = %d ms, queue size = %d\",\n           callee->spec().name.c_str(),\n           callee->id(),\n           callee->delay_milliseconds(),\n           tls_dsn.last_worker_queue_size);\n}\n\nstatic void tracer_on_task_begin(task *this_)\n{\n    switch (this_->spec().type) {\n    case dsn_task_type_t::TASK_TYPE_COMPUTE:\n    case dsn_task_type_t::TASK_TYPE_AIO:\n        ddebug(\"%s EXEC BEGIN, task_id = %016\" PRIx64 \"\", this_->spec().name.c_str(), this_->id());\n        break;\n    case dsn_task_type_t::TASK_TYPE_RPC_REQUEST: {\n        rpc_request_task *tsk = (rpc_request_task *)this_;\n        ddebug(\"%s EXEC BEGIN, task_id = %016\" PRIx64 \", %s => %s, trace_id = %016\" PRIx64 \"\",\n               this_->spec().name.c_str(),\n               this_->id(),\n               tsk->get_request()->header->from_address.to_string(),\n               tsk->get_request()->to_address.to_string(),\n               tsk->get_request()->header->trace_id);\n    } break;\n    case dsn_task_type_t::TASK_TYPE_RPC_RESPONSE: {\n        rpc_response_task *tsk = (rpc_response_task *)this_;\n        ddebug(\"%s EXEC BEGIN, task_id = %016\" PRIx64 \", %s => %s, trace_id = %016\" PRIx64 \"\",\n               this_->spec().name.c_str(),\n               this_->id(),\n               tsk->get_request()->to_address.to_string(),\n               tsk->get_request()->header->from_address.to_string(),\n               tsk->get_request()->header->trace_id);\n    } break;\n    default:\n        break;\n    }\n}\n\nstatic void tracer_on_task_end(task *this_)\n{\n    ddebug(\"%s EXEC END, task_id = %016\" PRIx64 \", err = %s\",\n           this_->spec().name.c_str(),\n           this_->id(),\n           this_->error().to_string());\n}\n\nstatic void tracer_on_task_cancelled(task *this_)\n{\n    ddebug(\"%s CANCELLED, task_id = %016\" PRIx64 \"\", this_->spec().name.c_str(), this_->id());\n}\n\nstatic void tracer_on_task_wait_pre(task *caller, task *callee, uint32_t timeout_ms) {}\n\nstatic void tracer_on_task_wait_post(task *caller, task *callee, bool succ) {}\n\nstatic void tracer_on_task_cancel_post(task *caller, task *callee, bool succ) {}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void tracer_on_aio_call(task *caller, aio_task *callee)\n{\n    ddebug(\"%s AIO.CALL, task_id = %016\" PRIx64 \", offset = %\" PRIu64 \", size = %d\",\n           callee->spec().name.c_str(),\n           callee->id(),\n           callee->get_aio_context()->file_offset,\n           callee->get_aio_context()->buffer_size);\n}\n\nstatic void tracer_on_aio_enqueue(aio_task *this_)\n{\n    ddebug(\"%s AIO.ENQUEUE, task_id = %016\" PRIx64 \", queue size = %d\",\n           this_->spec().name.c_str(),\n           this_->id(),\n           tls_dsn.last_worker_queue_size);\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void tracer_on_rpc_call(task *caller, message_ex *req, rpc_response_task *callee)\n{\n    message_header &hdr = *req->header;\n    ddebug(\"%s RPC.CALL: %s => %s, trace_id = %016\" PRIx64 \", callback_task = %016\" PRIx64\n           \", timeout = %d ms\",\n           hdr.rpc_name,\n           req->header->from_address.to_string(),\n           req->to_address.to_string(),\n           hdr.trace_id,\n           callee ? callee->id() : 0,\n           hdr.client.timeout_ms);\n}\n\nstatic void tracer_on_rpc_request_enqueue(rpc_request_task *callee)\n{\n    ddebug(\"%s RPC.REQUEST.ENQUEUE (0x%p), task_id = %016\" PRIx64\n           \", %s => %s, trace_id = %016\" PRIx64 \", queue size = %d\",\n           callee->spec().name.c_str(),\n           callee,\n           callee->id(),\n           callee->get_request()->header->from_address.to_string(),\n           callee->get_request()->to_address.to_string(),\n           callee->get_request()->header->trace_id,\n           tls_dsn.last_worker_queue_size);\n}\n\n// return true means continue, otherwise early terminate with task::set_error_code\nstatic void tracer_on_rpc_reply(task *caller, message_ex *msg)\n{\n    message_header &hdr = *msg->header;\n\n    ddebug(\"%s RPC.REPLY: %s => %s, trace_id = %016\" PRIx64 \"\",\n           hdr.rpc_name,\n           msg->header->from_address.to_string(),\n           msg->to_address.to_string(),\n           hdr.trace_id);\n}\n\nstatic void tracer_on_rpc_response_enqueue(rpc_response_task *resp)\n{\n    ddebug(\"%s RPC.RESPONSE.ENQUEUE, task_id = %016\" PRIx64 \", %s => %s, trace_id = %016\" PRIx64\n           \", queue size = %d\",\n           resp->spec().name.c_str(),\n           resp->id(),\n           resp->get_request()->to_address.to_string(),\n           resp->get_request()->header->from_address.to_string(),\n           resp->get_request()->header->trace_id,\n           tls_dsn.last_worker_queue_size);\n}\n\nstatic void tracer_on_rpc_create_response(message_ex *req, message_ex *resp)\n{\n    ddebug(\"%s RPC.CREATE.RESPONSE, trace_id = %016\" PRIx64 \"\",\n           resp->header->rpc_name,\n           resp->header->trace_id);\n}\n\nenum logged_event_t\n{\n    LET_TASK_BEGIN,\n    LET_TASK_END,\n    LET_LOG,\n    LET_RPC_CALL,\n    LET_RPC_REPLY,\n    LET_AIO_CALL,\n    LET_LPC_CALL,\n\n    LET_INVALID\n};\n\nENUM_BEGIN(logged_event_t, LET_INVALID)\nENUM_REG(LET_TASK_BEGIN)\nENUM_REG(LET_TASK_END)\nENUM_REG(LET_RPC_CALL)\nENUM_REG(LET_RPC_REPLY)\nENUM_REG(LET_AIO_CALL)\nENUM_REG(LET_LPC_CALL)\nENUM_END(logged_event_t)\n\nstruct logged_event\n{\n    uint64_t ts;\n    logged_event_t event_type;\n    uint64_t correlation_id; // task or rpc\n    std::string context;     // log or rpc address\n};\n\nstruct logged_task\n{\n    uint64_t task_id;\n    uint64_t trace_id; // if present\n\n    std::vector<logged_event> events;\n};\n\nstatic std::string tracer_log_flow_error(const char *msg)\n{\n    return std::string(\"invalid arguments for tracer.find: \") + msg;\n}\n\nstatic std::string tracer_log_flow(const std::vector<std::string> &args)\n{\n    // forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., 002a003920302390)\n    // log_file_name(log.xx.txt)\n    if (args.size() < 4) {\n        return tracer_log_flow_error(\"not enough arguments\");\n    }\n\n    // TODO: implement this\n    if (args[0] == \"forward\" || args[0] == \"f\") {\n    } else if (args[0] == \"backward\" || args[0] == \"b\") {\n    } else {\n        return tracer_log_flow_error(\"invalid direction argument - must be forward|f|backward|b\");\n    }\n\n    // TODO: implement this\n    if (args[1] == \"rpc\" || args[1] == \"r\") {\n    } else if (args[1] == \"task\" || args[1] == \"t\") {\n    } else {\n        return tracer_log_flow_error(\"invalid id type argument - must be rpc|r|task|t\");\n    }\n\n    uint64_t xid = 0;\n    sscanf(args[2].c_str(), \"%016\" PRIx64, &xid);\n    if (xid == 0) {\n        return tracer_log_flow_error(\"invalid id value - must be with 016\" PRIx64 \" format\");\n    }\n\n    std::string log_dir = utils::filesystem::path_combine(tools::spec().data_dir, \"logs\");\n\n    std::string fpath = utils::filesystem::path_combine(log_dir, args[3]);\n\n    if (!utils::filesystem::file_exists(fpath)) {\n        return tracer_log_flow_error((fpath + \" not exist\").c_str());\n    }\n\n    return \"Not implemented\";\n}\n\nvoid tracer::install(service_spec &spec)\n{\n    auto trace = dsn_config_get_value_bool(\n        \"task..default\", \"is_trace\", false, \"whether to trace tasks by default\");\n\n    for (int i = 0; i <= dsn::task_code::max(); i++) {\n        if (i == TASK_CODE_INVALID)\n            continue;\n\n        std::string section_name =\n            std::string(\"task.\") + std::string(dsn::task_code(i).to_string());\n        task_spec *spec = task_spec::get(i);\n        dassert(spec != nullptr, \"task_spec cannot be null\");\n\n        if (!dsn_config_get_value_bool(\n                section_name.c_str(), \"is_trace\", trace, \"whether to trace this kind of task\"))\n            continue;\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_create\",\n                                      true,\n                                      \"whether to trace when a task is created\"))\n            spec->on_task_create.put_back(tracer_on_task_create, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_enqueue\",\n                                      true,\n                                      \"whether to trace when a timer or async task is enqueued\"))\n            spec->on_task_enqueue.put_back(tracer_on_task_enqueue, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_begin\",\n                                      true,\n                                      \"whether to trace when a task begins\"))\n            spec->on_task_begin.put_back(tracer_on_task_begin, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_end\",\n                                      true,\n                                      \"whether to trace when a task ends\"))\n            spec->on_task_end.put_back(tracer_on_task_end, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_cancelled\",\n                                      true,\n                                      \"whether to trace when a task is cancelled\"))\n            spec->on_task_cancelled.put_back(tracer_on_task_cancelled, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_wait_pre\",\n                                      true,\n                                      \"whether to trace when a task is to be wait\"))\n            spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_wait_post\",\n                                      true,\n                                      \"whether to trace when a task is wait post\"))\n            spec->on_task_wait_post.put_back(tracer_on_task_wait_post, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_task_cancel_post\",\n                                      true,\n                                      \"whether to trace when a task is cancel post\"))\n            spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_aio_call\",\n                                      true,\n                                      \"whether to trace when an aio task is called\"))\n            spec->on_aio_call.put_back(tracer_on_aio_call, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_aio_enqueue\",\n                                      true,\n                                      \"whether to trace when an aio task is enqueued\"))\n            spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_rpc_call\",\n                                      true,\n                                      \"whether to trace when a rpc is made\"))\n            spec->on_rpc_call.put_back(tracer_on_rpc_call, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_rpc_request_enqueue\",\n                                      true,\n                                      \"whether to trace when a rpc request task is enqueued\"))\n            spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_rpc_reply\",\n                                      true,\n                                      \"whether to trace when reply a rpc request\"))\n            spec->on_rpc_reply.put_back(tracer_on_rpc_reply, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_rpc_response_enqueue\",\n                                      true,\n                                      \"whetehr to trace when a rpc response task is enqueued\"))\n            spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, \"tracer\");\n\n        if (dsn_config_get_value_bool(section_name.c_str(),\n                                      \"tracer::on_rpc_create_response\",\n                                      true,\n                                      \"whetehr to trace when a rpc response task is created\"))\n            spec->on_rpc_create_response.put_back(tracer_on_rpc_create_response, \"tracer\");\n    }\n\n    command_manager::instance().register_command(\n        {\"tracer.find\"},\n        \"tracer.find - find related logs\",\n        \"tracer.find forward|f|backward|b rpc|r|task|t trace_id|task_id(e.g., \"\n        \"a023003920302390) log_file_name(log.xx.txt)\",\n        tracer_log_flow);\n}\n\ntracer::tracer(const char *name) : toollet(name) {}\n}\n}\n"
  },
  {
    "path": "src/runtime/zlocks.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/factory_store.h>\n#include <dsn/tool-api/zlocks.h>\n#include \"utils/zlock_provider.h\"\n#include \"runtime/service_engine.h\"\n\nnamespace dsn {\n\nnamespace lock_checker {\n__thread int zlock_exclusive_count;\n__thread int zlock_shared_count;\n\nvoid check_wait_safety()\n{\n    if (zlock_exclusive_count + zlock_shared_count > 0) {\n        dwarn(\"wait inside locks may lead to deadlocks - current thread owns %u exclusive locks \"\n              \"and %u shared locks now.\",\n              zlock_exclusive_count,\n              zlock_shared_count);\n    }\n}\n\nvoid check_dangling_lock()\n{\n    if (zlock_exclusive_count + zlock_shared_count > 0) {\n        dwarn(\"locks should not be hold at this point - current thread owns %u exclusive locks and \"\n              \"%u shared locks now.\",\n              zlock_exclusive_count,\n              zlock_shared_count);\n    }\n}\n} // namespace lock_checker\n\nzlock::zlock(bool recursive)\n{\n    if (recursive) {\n        lock_provider *last = utils::factory_store<lock_provider>::create(\n            dsn::service_engine::instance().spec().lock_factory_name.c_str(),\n            dsn::PROVIDER_TYPE_MAIN,\n            nullptr);\n        _h = last;\n    } else {\n        lock_nr_provider *last = utils::factory_store<lock_nr_provider>::create(\n            dsn::service_engine::instance().spec().lock_nr_factory_name.c_str(),\n            dsn::PROVIDER_TYPE_MAIN,\n            nullptr);\n        _h = last;\n    }\n}\n\nzlock::~zlock() { delete _h; }\n\nvoid zlock::lock()\n{\n    _h->lock();\n    ++lock_checker::zlock_exclusive_count;\n}\n\nbool zlock::try_lock()\n{\n    auto r = _h->try_lock();\n    if (r) {\n        ++lock_checker::zlock_exclusive_count;\n    }\n    return r;\n}\n\nvoid zlock::unlock()\n{\n    --lock_checker::zlock_exclusive_count;\n    _h->unlock();\n}\n\nzrwlock_nr::zrwlock_nr()\n{\n    rwlock_nr_provider *last = utils::factory_store<rwlock_nr_provider>::create(\n        service_engine::instance().spec().rwlock_nr_factory_name.c_str(),\n        dsn::PROVIDER_TYPE_MAIN,\n        nullptr);\n    _h = last;\n}\n\nzrwlock_nr::~zrwlock_nr() { delete _h; }\n\nvoid zrwlock_nr::lock_read()\n{\n    _h->lock_read();\n    ++lock_checker::zlock_shared_count;\n}\n\nvoid zrwlock_nr::unlock_read()\n{\n    --lock_checker::zlock_shared_count;\n    _h->unlock_read();\n}\n\nbool zrwlock_nr::try_lock_read()\n{\n    auto r = _h->try_lock_read();\n    if (r)\n        ++lock_checker::zlock_shared_count;\n    return r;\n}\n\nvoid zrwlock_nr::lock_write()\n{\n    _h->lock_write();\n    ++lock_checker::zlock_exclusive_count;\n}\n\nvoid zrwlock_nr::unlock_write()\n{\n    --lock_checker::zlock_exclusive_count;\n    _h->unlock_write();\n}\n\nbool zrwlock_nr::try_lock_write()\n{\n    auto r = _h->try_lock_write();\n    if (r)\n        ++lock_checker::zlock_exclusive_count;\n    return r;\n}\n\nzsemaphore::zsemaphore(int initial_count)\n{\n    semaphore_provider *last = utils::factory_store<semaphore_provider>::create(\n        service_engine::instance().spec().semaphore_factory_name.c_str(),\n        PROVIDER_TYPE_MAIN,\n        initial_count,\n        nullptr);\n    _h = last;\n}\n\nzsemaphore::~zsemaphore() { delete _h; }\n\nvoid zsemaphore::signal(int count) { _h->signal(count); }\n\nbool zsemaphore::wait(int timeout_milliseconds)\n{\n    if (static_cast<unsigned int>(timeout_milliseconds) == TIME_MS_MAX) {\n        lock_checker::check_wait_safety();\n        _h->wait();\n        return true;\n    } else {\n        return _h->wait(timeout_milliseconds);\n    }\n}\n\nzevent::zevent(bool manualReset, bool initState /* = false*/)\n{\n    _manualReset = manualReset;\n    _signaled = initState;\n    if (_signaled) {\n        _sema.signal();\n    }\n}\n\nzevent::~zevent() {}\n\nvoid zevent::set()\n{\n    bool nonsignaled = false;\n    if (std::atomic_compare_exchange_strong(&_signaled, &nonsignaled, true)) {\n        _sema.signal();\n    }\n}\n\nvoid zevent::reset()\n{\n    if (_manualReset) {\n        bool signaled = true;\n        if (std::atomic_compare_exchange_strong(&_signaled, &signaled, false)) {\n        }\n    }\n}\n\nbool zevent::wait(int timeout_milliseconds)\n{\n    if (_manualReset) {\n        if (std::atomic_load(&_signaled))\n            return true;\n\n        _sema.wait(timeout_milliseconds);\n        return std::atomic_load(&_signaled);\n    }\n\n    else {\n        bool signaled = true;\n        if (std::atomic_compare_exchange_strong(&_signaled, &signaled, false))\n            return true;\n\n        _sema.wait(timeout_milliseconds);\n        return std::atomic_compare_exchange_strong(&_signaled, &signaled, false);\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/tools/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.replication.tool)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \"\")\n\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n"
  },
  {
    "path": "src/tools/mutation_log_tool.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/dist/replication/mutation_log_tool.h>\n#include <dsn/utils/time_utils.h>\n#include \"replica/mutation_log.h\"\n\nnamespace dsn {\nnamespace replication {\n\nbool mutation_log_tool::dump(\n    const std::string &log_dir,\n    std::ostream &output,\n    std::function<void(int64_t decree, int64_t timestamp, dsn::message_ex **requests, int count)>\n        callback)\n{\n    mutation_log_ptr mlog = new mutation_log_shared(log_dir, 32, false);\n    error_code err = mlog->open(\n        [mlog, &output, callback](int log_length, mutation_ptr &mu) -> bool {\n            if (mlog->max_decree(mu->data.header.pid) == 0) {\n                mlog->set_valid_start_offset_on_open(mu->data.header.pid, 0);\n            }\n            char timestamp_buf[32];\n            utils::time_ms_to_string(mu->data.header.timestamp / 1000, timestamp_buf);\n            output << \"mutation [\" << mu->name() << \"]: \"\n                   << \"gpid=\" << mu->data.header.pid.get_app_id() << \".\"\n                   << mu->data.header.pid.get_partition_index() << \", \"\n                   << \"ballot=\" << mu->data.header.ballot << \", decree=\" << mu->data.header.decree\n                   << \", \"\n                   << \"timestamp=\" << timestamp_buf\n                   << \", last_committed_decree=\" << mu->data.header.last_committed_decree << \", \"\n                   << \"log_offset=\" << mu->data.header.log_offset << \", log_length=\" << log_length\n                   << \", \"\n                   << \"update_count=\" << mu->data.updates.size();\n            if (callback && mu->data.updates.size() > 0) {\n\n                dsn::message_ex **batched_requests =\n                    (dsn::message_ex **)alloca(sizeof(dsn::message_ex *) * mu->data.updates.size());\n                int batched_count = 0;\n                for (mutation_update &update : mu->data.updates) {\n                    dsn::message_ex *req = dsn::message_ex::create_received_request(\n                        update.code,\n                        (dsn_msg_serialize_format)update.serialization_type,\n                        (void *)update.data.data(),\n                        update.data.length());\n                    batched_requests[batched_count++] = req;\n                }\n                callback(mu->data.header.decree,\n                         mu->data.header.timestamp,\n                         batched_requests,\n                         batched_count);\n                for (int i = 0; i < batched_count; i++) {\n                    batched_requests[i]->release_ref();\n                }\n            }\n            return true;\n        },\n        nullptr);\n    mlog->close();\n    if (err != dsn::ERR_OK) {\n        output << \"ERROR: dump mutation log failed, err = \" << err.to_string() << std::endl;\n        return false;\n    } else {\n        return true;\n    }\n}\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_utils)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\nset(MY_PROJ_LIBS crypto)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\nif (APPLE)\n    dsn_add_static_library()\nelse()\n    dsn_add_shared_library()\nendif()\n\nadd_subdirectory(long_adder_bench)\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/utils/alloc.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/alloc.h>\n\n#include <cstdlib>\n\n#include <dsn/utility/safe_strerror_posix.h>\n\nnamespace dsn {\n\n#ifdef CACHELINE_SIZE\n\n/* extern */ void *cacheline_aligned_alloc(size_t size)\n{\n    if (dsn_unlikely(size == 0)) {\n        return nullptr;\n    }\n\n    void *buffer = nullptr;\n    // CACHELINE_SIZE must be a power of 2 and a multiple of sizeof(void *), which have been\n    // checked statically at compile time when CACHELINE_SIZE is defined as macro.\n    int err = posix_memalign(&buffer, CACHELINE_SIZE, size);\n\n    // Generally there are 2 possible errors for posix_memalign as below:\n    // [EINVAL]\n    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).\n    // [ENOMEM]\n    //     There is insufficient memory available with the requested alignment.\n    // Thus making an assertion here is enough.\n    dassert_f(err == 0, \"error calling posix_memalign: {}\", utils::safe_strerror(err).c_str());\n\n    return buffer;\n}\n\n/* extern */ void cacheline_aligned_free(void *mem_block) { free(mem_block); }\n\n#endif\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/binary_reader.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/binary_reader.h>\n#include <dsn/c/api_utilities.h>\n\nnamespace dsn {\n\nbinary_reader::binary_reader(const blob &bb) { init(bb); }\nbinary_reader::binary_reader(blob &&bb) { init(std::move(bb)); }\n\nvoid binary_reader::init(const blob &bb)\n{\n    _blob = bb;\n    _size = bb.length();\n    _ptr = bb.data();\n    _remaining_size = _size;\n}\n\nvoid binary_reader::init(blob &&bb)\n{\n    _blob = std::move(bb);\n    _size = _blob.length();\n    _ptr = _blob.data();\n    _remaining_size = _size;\n}\n\nint binary_reader::read(/*out*/ std::string &s)\n{\n    int len;\n    if (0 == read(len))\n        return 0;\n\n    s.resize(len, 0);\n\n    if (len > 0) {\n        int x = read((char *)&s[0], len);\n        return x == 0 ? x : (x + sizeof(len));\n    } else {\n        return static_cast<int>(sizeof(len));\n    }\n}\n\nint binary_reader::read(blob &blob)\n{\n    int len;\n    if (0 == read(len))\n        return 0;\n\n    return read(blob, len);\n}\n\nint binary_reader::read(blob &blob, int len)\n{\n    auto res = inner_read(blob, len);\n    if (dsn_unlikely(res < 0)) {\n        assert(false);\n    }\n    return res;\n}\n\nint binary_reader::read(char *buffer, int sz)\n{\n    auto res = inner_read(buffer, sz);\n    if (dsn_unlikely(res < 0)) {\n        assert(false);\n    }\n    return res;\n}\n\nint binary_reader::inner_read(blob &blob, int len)\n{\n    if (len <= get_remaining_size()) {\n        blob = _blob.range(static_cast<int>(_ptr - _blob.data()), len);\n\n        // optimization: zero-copy\n        if (!blob.buffer_ptr()) {\n            std::shared_ptr<char> buffer(::dsn::utils::make_shared_array<char>(len));\n            memcpy(buffer.get(), blob.data(), blob.length());\n            blob = ::dsn::blob(buffer, 0, blob.length());\n        }\n\n        _ptr += len;\n        _remaining_size -= len;\n        return len + sizeof(len);\n    } else {\n        return -1;\n    }\n}\n\nint binary_reader::inner_read(char *buffer, int sz)\n{\n    if (sz <= get_remaining_size()) {\n        memcpy((void *)buffer, _ptr, sz);\n        _ptr += sz;\n        _remaining_size -= sz;\n        return sz;\n    } else {\n        return -1;\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/binary_writer.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/binary_writer.h>\n\nnamespace dsn {\nint binary_writer::_reserved_size_per_buffer_static = 256;\n\nbinary_writer::binary_writer(int reserveBufferSize)\n{\n    _total_size = 0;\n    _buffers.reserve(1);\n    _reserved_size_per_buffer =\n        (reserveBufferSize == 0) ? _reserved_size_per_buffer_static : reserveBufferSize;\n    _current_buffer = nullptr;\n    _current_offset = 0;\n    _current_buffer_length = 0;\n}\n\nbinary_writer::binary_writer(blob &buffer)\n{\n    _total_size = 0;\n    _buffers.reserve(1);\n    _reserved_size_per_buffer = _reserved_size_per_buffer_static;\n\n    _buffers.push_back(buffer);\n    _current_buffer = (char *)buffer.data();\n    _current_offset = 0;\n    _current_buffer_length = buffer.length();\n}\n\nbinary_writer::~binary_writer() {}\n\nvoid binary_writer::flush() { commit(); }\n\nvoid binary_writer::create_buffer(size_t size)\n{\n    commit();\n\n    blob bb;\n    create_new_buffer(size, bb);\n    _buffers.push_back(bb);\n\n    _current_buffer = (char *)bb.data();\n    _current_buffer_length = bb.length();\n}\n\nvoid binary_writer::create_new_buffer(size_t size, /*out*/ blob &bb)\n{\n    bb.assign(::dsn::utils::make_shared_array<char>(size), 0, (int)size);\n}\n\nvoid binary_writer::commit()\n{\n    if (_current_offset > 0) {\n        *_buffers.rbegin() = _buffers.rbegin()->range(0, _current_offset);\n\n        _current_offset = 0;\n        _current_buffer_length = 0;\n    }\n}\n\nblob binary_writer::get_buffer()\n{\n    commit();\n\n    if (_buffers.size() == 1) {\n        return _buffers[0];\n    } else if (_total_size == 0) {\n        return blob();\n    } else {\n        std::shared_ptr<char> bptr(::dsn::utils::make_shared_array<char>(_total_size));\n        blob bb(bptr, _total_size);\n        const char *ptr = bb.data();\n\n        for (int i = 0; i < static_cast<int>(_buffers.size()); i++) {\n            memcpy((void *)ptr, (const void *)_buffers[i].data(), (size_t)_buffers[i].length());\n            ptr += _buffers[i].length();\n        }\n        return bb;\n    }\n}\n\nblob binary_writer::get_current_buffer()\n{\n    if (_buffers.size() == 1) {\n        return _current_offset > 0 ? _buffers[0].range(0, _current_offset) : _buffers[0];\n    } else {\n        std::shared_ptr<char> bptr(::dsn::utils::make_shared_array<char>(_total_size));\n        blob bb(bptr, _total_size);\n        const char *ptr = bb.data();\n\n        for (int i = 0; i < static_cast<int>(_buffers.size()); i++) {\n            size_t len = (size_t)_buffers[i].length();\n            if (_current_offset > 0 && i + 1 == (int)_buffers.size()) {\n                len = _current_offset;\n            }\n\n            memcpy((void *)ptr, (const void *)_buffers[i].data(), len);\n            ptr += _buffers[i].length();\n        }\n        return bb;\n    }\n}\n\nvoid binary_writer::write_empty(int sz)\n{\n    int sz0 = sz;\n    int rem_size = _current_buffer_length - _current_offset;\n    if (rem_size >= sz) {\n        _current_offset += sz;\n    } else {\n        _current_offset += rem_size;\n        sz -= rem_size;\n\n        int allocSize = _reserved_size_per_buffer;\n        if (sz > allocSize)\n            allocSize = sz;\n\n        create_buffer(allocSize);\n        _current_offset += sz;\n    }\n\n    _total_size += sz0;\n}\n\nvoid binary_writer::write(const char *buffer, int sz)\n{\n    int rem_size = _current_buffer_length - _current_offset;\n    if (rem_size >= sz) {\n        memcpy((void *)(_current_buffer + _current_offset), buffer, (size_t)sz);\n        _current_offset += sz;\n        _total_size += sz;\n    } else {\n        if (rem_size > 0) {\n            memcpy((void *)(_current_buffer + _current_offset), buffer, (size_t)rem_size);\n            _current_offset += rem_size;\n            _total_size += rem_size;\n            sz -= rem_size;\n        }\n\n        int allocSize = _reserved_size_per_buffer;\n        if (sz > allocSize)\n            allocSize = sz;\n\n        create_buffer(allocSize);\n        memcpy((void *)(_current_buffer + _current_offset), buffer + rem_size, (size_t)sz);\n        _current_offset += sz;\n        _total_size += sz;\n    }\n}\n\nbool binary_writer::next(void **data, int *size)\n{\n    int rem_size = _current_buffer_length - _current_offset;\n    if (rem_size == 0) {\n        create_buffer(_reserved_size_per_buffer);\n        rem_size = _current_buffer_length;\n    }\n\n    *size = rem_size;\n    *data = (void *)(_current_buffer + _current_offset);\n    _current_offset = _current_buffer_length;\n    _total_size += rem_size;\n    return true;\n}\n\nbool binary_writer::backup(int count)\n{\n    assert(count <= _current_offset);\n    _current_offset -= count;\n    _total_size -= count;\n    return true;\n}\n}\n"
  },
  {
    "path": "src/utils/clock.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/clock.h>\n#include <dsn/utils/time_utils.h>\n#include <dsn/utility/dlib.h>\n#include <dsn/utility/smart_pointers.h>\n\nDSN_API uint64_t dsn_now_ns() { return dsn::utils::clock::instance()->now_ns(); }\n\nnamespace dsn {\nnamespace utils {\n\nstd::unique_ptr<clock> clock::_clock = make_unique<clock>();\n\nconst clock *clock::instance() { return _clock.get(); }\n\nuint64_t clock::now_ns() const { return get_current_physical_time_ns(); }\n\nvoid clock::mock(clock *mock_clock) { _clock.reset(mock_clock); }\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/command_manager.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <iostream>\n#include <thread>\n#include <sstream>\n\n#include <dsn/utility/utils.h>\n#include <dsn/tool-api/command_manager.h>\n\nnamespace dsn {\n\ndsn_handle_t command_manager::register_command(const std::vector<std::string> &commands,\n                                               const std::string &help_one_line,\n                                               const std::string &help_long,\n                                               command_handler handler)\n{\n    utils::auto_write_lock l(_lock);\n    bool is_valid_cmd = false;\n\n    for (const std::string &cmd : commands) {\n        if (!cmd.empty()) {\n            is_valid_cmd = true;\n            auto it = _handlers.find(cmd);\n            dassert(it == _handlers.end(), \"command '%s' already regisered\", cmd.c_str());\n        }\n    }\n    dassert(is_valid_cmd, \"should not register empty command\");\n\n    command_instance *c = new command_instance();\n    c->commands = commands;\n    c->help_long = help_long;\n    c->help_short = help_one_line;\n    c->handler = handler;\n\n    for (const std::string &cmd : commands) {\n        if (!cmd.empty()) {\n            _handlers[cmd] = c;\n        }\n    }\n    return c;\n}\n\nvoid command_manager::deregister_command(dsn_handle_t handle)\n{\n    auto c = reinterpret_cast<command_instance *>(handle);\n    dassert(c != nullptr, \"cannot deregister a null handle\");\n    utils::auto_write_lock l(_lock);\n    for (const std::string &cmd : c->commands) {\n        _handlers.erase(cmd);\n    }\n}\n\nbool command_manager::run_command(const std::string &cmd,\n                                  const std::vector<std::string> &args,\n                                  /*out*/ std::string &output)\n{\n    command_instance *h = nullptr;\n    {\n        utils::auto_read_lock l(_lock);\n        auto it = _handlers.find(cmd);\n        if (it != _handlers.end())\n            h = it->second;\n    }\n\n    if (h == nullptr) {\n        output = std::string(\"unknown command '\") + cmd + \"'\";\n        return false;\n    } else {\n        output = h->handler(args);\n        return true;\n    }\n}\n\ncommand_manager::command_manager()\n{\n    register_command({\"help\", \"h\", \"H\", \"Help\"},\n                     \"help|Help|h|H [command] - display help information\",\n                     \"\",\n                     [this](const std::vector<std::string> &args) {\n                         std::stringstream ss;\n\n                         if (args.size() == 0) {\n                             utils::auto_read_lock l(_lock);\n                             for (const auto &c : this->_handlers) {\n                                 ss << c.second->help_short << std::endl;\n                             }\n                         } else {\n                             utils::auto_read_lock l(_lock);\n                             auto it = _handlers.find(args[0]);\n                             if (it == _handlers.end())\n                                 ss << \"cannot find command '\" << args[0] << \"'\";\n                             else {\n                                 ss.width(6);\n                                 ss << std::left << it->first << \": \" << it->second->help_short\n                                    << std::endl\n                                    << it->second->help_long << std::endl;\n                             }\n                         }\n\n                         return ss.str();\n                     });\n\n    register_command(\n        {\"repeat\", \"r\", \"R\", \"Repeat\"},\n        \"repeat|Repeat|r|R interval_seconds max_count command - execute command periodically\",\n        \"repeat|Repeat|r|R interval_seconds max_count command - execute command every interval \"\n        \"seconds, to the max count as max_count (0 for infinite)\",\n        [this](const std::vector<std::string> &args) {\n            std::stringstream ss;\n\n            if (args.size() < 3) {\n                return \"insufficient arguments\";\n            }\n\n            int interval_seconds = atoi(args[0].c_str());\n            if (interval_seconds <= 0) {\n                return \"invalid interval argument\";\n            }\n\n            int max_count = atoi(args[1].c_str());\n            if (max_count < 0) {\n                return \"invalid max count\";\n            }\n\n            if (max_count == 0) {\n                max_count = std::numeric_limits<int>::max();\n            }\n\n            std::string cmd = args[2];\n            std::vector<std::string> largs;\n            for (int i = 3; i < (int)args.size(); i++) {\n                largs.push_back(args[i]);\n            }\n\n            for (int i = 0; i < max_count; i++) {\n                std::string output;\n                auto r = this->run_command(cmd, largs, output);\n\n                if (!r) {\n                    break;\n                }\n\n                std::this_thread::sleep_for(std::chrono::seconds(interval_seconds));\n            }\n\n            return \"repeat command completed\";\n        });\n}\n\ncommand_manager::~command_manager() { _handlers.clear(); }\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/config_api.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <fstream>\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/configuration.h>\n\ndsn::configuration g_config;\n\nbool dsn_config_load(const char *file, const char *arguments)\n{\n    return g_config.load(file, arguments);\n}\n\nvoid dsn_config_dump(std::ostream &os) { g_config.dump(os); }\n\nconst char *dsn_config_get_value_string(const char *section,\n                                        const char *key,\n                                        const char *default_value,\n                                        const char *dsptr)\n{\n    return g_config.get_string_value(section, key, default_value, dsptr);\n}\n\nbool dsn_config_get_value_bool(const char *section,\n                               const char *key,\n                               bool default_value,\n                               const char *dsptr)\n{\n    return g_config.get_value<bool>(section, key, default_value, dsptr);\n}\n\nuint64_t dsn_config_get_value_uint64(const char *section,\n                                     const char *key,\n                                     uint64_t default_value,\n                                     const char *dsptr)\n{\n    return g_config.get_value<uint64_t>(section, key, default_value, dsptr);\n}\n\nint64_t dsn_config_get_value_int64(const char *section,\n                                   const char *key,\n                                   int64_t default_value,\n                                   const char *dsptr)\n{\n    return g_config.get_value<int64_t>(section, key, default_value, dsptr);\n}\n\ndouble dsn_config_get_value_double(const char *section,\n                                   const char *key,\n                                   double default_value,\n                                   const char *dsptr)\n{\n    return g_config.get_value<double>(section, key, default_value, dsptr);\n}\n\nvoid dsn_config_get_all_sections(/*out*/ std::vector<std::string> &sections)\n{\n    g_config.get_all_sections(sections);\n}\n\nvoid dsn_config_get_all_sections(/*out*/ std::vector<const char *> &sections)\n{\n    g_config.get_all_section_ptrs(sections);\n}\n\nvoid dsn_config_get_all_keys(const char *section, std::vector<std::string> &keys)\n{\n    std::vector<const char *> key_ptrs;\n    g_config.get_all_keys(section, key_ptrs);\n    for (const char *p : key_ptrs)\n        keys.emplace_back(std::string(p));\n}\n\nvoid dsn_config_get_all_keys(const char *section, /*out*/ std::vector<const char *> &keys)\n{\n    g_config.get_all_keys(section, keys);\n}\n\nvoid dsn_config_set(const char *section, const char *key, const char *value, const char *dsptr)\n{\n    g_config.set(section, key, value, dsptr);\n}\n"
  },
  {
    "path": "src/utils/configuration.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <cassert>\n#include <errno.h>\n#include <iostream>\n#include <algorithm>\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/strings.h>\n#include <dsn/utility/configuration.h>\n\nnamespace dsn {\n\nconfiguration::configuration() { _warning = false; }\n\nconfiguration::~configuration()\n{\n    for (auto &section_kv : _configs) {\n        auto &section = section_kv.second;\n        for (auto &kv : section) {\n            delete kv.second;\n        }\n    }\n    _configs.clear();\n}\n\n// arguments: k1=v1;k2=v2;k3=v3; ...\n// e.g.,\n//    port = %port%\n//    timeout = %timeout%\n// arguments: port=23466;timeout=1000 or arguments: ports=23466,timout=1000\nbool configuration::load(const char *file_name, const char *arguments)\n{\n    _file_name = std::string(file_name);\n\n    FILE *fd = ::fopen(file_name, \"rb\");\n    if (fd == nullptr) {\n        std::string cdir;\n        dsn::utils::filesystem::get_current_directory(cdir);\n        printf(\"ERROR: cannot open file %s in %s, err = %s\\n\",\n               file_name,\n               cdir.c_str(),\n               strerror(errno));\n        return false;\n    }\n    ::fseek(fd, 0, SEEK_END);\n    int len = ftell(fd);\n    if (len == -1 || len == 0) {\n        printf(\"ERROR: cannot get length of %s, err = %s\\n\", file_name, strerror(errno));\n        ::fclose(fd);\n        return false;\n    }\n\n    _file_data.resize(len + 1);\n    ::fseek(fd, 0, SEEK_SET);\n    auto sz = ::fread((char *)_file_data.c_str(), len, 1, fd);\n    ::fclose(fd);\n    if (sz != 1) {\n        printf(\"ERROR: cannot read correct data of %s, err = %s\\n\", file_name, strerror(errno));\n        return false;\n    }\n    _file_data[len] = '\\n';\n\n    // replace data with arguments\n    if (arguments != nullptr) {\n        std::string str_arguments(arguments);\n        std::replace(str_arguments.begin(), str_arguments.end(), ',', ';');\n        std::list<std::string> argkvs;\n        utils::split_args(str_arguments.c_str(), argkvs, ';');\n        for (auto &kv : argkvs) {\n            std::list<std::string> vs;\n            utils::split_args(kv.c_str(), vs, '=');\n            if (vs.size() != 2) {\n                printf(\n                    \"ERROR: invalid configuration argument: '%s' in '%s'\\n\", kv.c_str(), arguments);\n                return false;\n            }\n\n            std::string key = std::string(\"%\") + *vs.begin() + std::string(\"%\");\n            std::string value = *vs.rbegin();\n            _file_data = utils::replace_string(_file_data, key, value);\n        }\n    }\n\n    //\n    // parse mapped file and build conf map\n    //\n    std::map<std::string, conf *> *pSection = nullptr;\n    char *p, *pLine = (char *)\"\", *pNextLine, *pEnd, *pSectionName = nullptr, *pEqual;\n    int lineno = 0;\n\n    // ATTENTION: arguments replace_string() may cause _file_data changed,\n    // so set `p' and `pEnd' carefully.\n    p = (char *)_file_data.c_str();\n    pEnd = p + _file_data.size();\n\n    while (p < pEnd) {\n        //\n        // get line\n        //\n        lineno++;\n        while (*p == ' ' || *p == '\\t' || *p == '\\r')\n            p++;\n\n        pLine = p;\n        int shift = 0;\n        while (*p != '\\n' && p < pEnd) {\n            if (*p == '#' || *p == ';') {\n                if (p != pLine && *(p - 1) == '^') {\n                    shift++;\n                } else {\n                    *p = '\\0';\n                }\n            }\n\n            if (shift > 0) {\n                *(p - shift) = *p;\n            }\n            p++;\n        }\n        *(p - shift) = '\\0';\n        pNextLine = ++p;\n\n        //\n        // parse line\n        //\n        p = pLine;\n        if (*p == '\\0')\n            goto Next; // skip comment line or empty line\n        pEqual = strchr(p, '=');\n        if (nullptr == pEqual && *p != '[') {\n            goto ConfReg;\n        }\n        if (nullptr != pEqual && *p == '[')\n            goto err;\n\n        //\n        //    conf\n        //\n        if (pEqual) {\n        ConfReg:\n            if (pSection == nullptr) {\n                printf(\"ERROR: configuration section not defined\\n\");\n                goto err;\n            }\n            if (pEqual)\n                *pEqual = '\\0';\n            char *pKey = utils::trim_string(p);\n            char *pValue = pEqual ? utils::trim_string(++pEqual) : nullptr;\n            if (*pKey == '\\0')\n                goto err;\n\n            if (pSection->find((const char *)pKey) != pSection->end()) {\n                auto it = pSection->find((const char *)pKey);\n\n                printf(\"WARNING: skip redefinition of option [%s] %s (line %u), already defined as \"\n                       \"[%s] %s (line %u)\\n\",\n                       pSectionName,\n                       pKey,\n                       lineno,\n                       it->second->section.c_str(),\n                       it->second->key.c_str(),\n                       it->second->line);\n            } else {\n                conf *cf = new conf;\n                cf->section = (const char *)pSectionName;\n                cf->key = pKey;\n                cf->line = lineno;\n                cf->present = true;\n\n                if (pValue) {\n                    // if argument is not provided\n                    if (strlen(pValue) > 2 && *pValue == '%' && pValue[strlen(pValue) - 1] == '%')\n                        cf->value = \"\";\n                    else\n                        cf->value = pValue;\n                } else {\n                    cf->value = \"\";\n                }\n\n                pSection->insert(std::make_pair(std::string(pKey), cf));\n            }\n        }\n        //\n        //    section\n        //\n        else {\n            char *pRight = strchr(p, ']');\n            if (nullptr == pRight)\n                goto err;\n            *pRight = '\\0';\n            p++;\n            pSectionName = utils::trim_string(p);\n            if (*pSectionName == '\\0')\n                goto err;\n\n            bool old = set_warning(false);\n            if (has_section((const char *)pSectionName)) {\n                printf(\"ERROR: configuration section '[%s]' is redefined\\n\", pSectionName);\n                set_warning(old);\n                goto err;\n            }\n            set_warning(old);\n\n            std::map<std::string, conf *> sm;\n            auto it = _configs.insert(config_map::value_type(std::string(pSectionName), sm));\n            assert(it.second);\n            pSection = &it.first->second;\n        }\n\n    //\n    // iterate nextline\n    //\n    Next:\n        p = pNextLine;\n    }\n    return true;\n\nerr:\n    printf(\"ERROR: unexpected configuration in %s(line %d): %s\\n\", file_name, lineno, pLine);\n    return false;\n}\n\nvoid configuration::get_all_section_ptrs(std::vector<const char *> &sections)\n{\n    sections.clear();\n    for (auto it = _configs.begin(); it != _configs.end(); ++it) {\n        sections.push_back(it->first.c_str());\n    }\n}\n\nvoid configuration::get_all_sections(std::vector<std::string> &sections)\n{\n    sections.clear();\n    for (auto it = _configs.begin(); it != _configs.end(); ++it) {\n        sections.push_back(it->first);\n    }\n}\n\nvoid configuration::get_all_keys(const char *section, std::vector<const char *> &keys)\n{\n    std::multimap<int, const char *> ordered_keys;\n    keys.clear();\n    auto it = _configs.find(section);\n    if (it != _configs.end()) {\n        for (auto it2 = it->second.begin(); it2 != it->second.end(); it2++) {\n            ordered_keys.emplace(it2->second->line, it2->first.c_str());\n        }\n    }\n\n    for (auto &k : ordered_keys) {\n        keys.push_back(k.second);\n    }\n}\n\nbool configuration::get_string_value_internal(const char *section,\n                                              const char *key,\n                                              const char *default_value,\n                                              const char **ov,\n                                              const char *dsptr)\n{\n    _lock.lock();\n\n    std::map<std::string, conf *> *ps = nullptr;\n    auto it = _configs.find(section);\n    if (it != _configs.end()) {\n        ps = &it->second;\n        auto it2 = it->second.find(key);\n        if (it2 != it->second.end()) {\n            if (!it2->second->present) {\n                if (it2->second->value != default_value) {\n                    printf(\"ERROR: configuration default value is different for '[%s] %s': %s <--> \"\n                           \"%s\\n\",\n                           section,\n                           key,\n                           it2->second->value.c_str(),\n                           default_value);\n                    ::abort();\n                }\n            }\n\n            if (it2->second->dsptr.length() == 0)\n                it2->second->dsptr = dsptr;\n\n            *ov = it2->second->value.c_str();\n            bool ret = it2->second->present ? true : false;\n\n            _lock.unlock();\n            return ret;\n        }\n    }\n\n    if (ps == nullptr) {\n        std::map<std::string, conf *> sm;\n        auto it = _configs.insert(config_map::value_type(std::string(section), sm));\n        assert(it.second);\n        ps = &it.first->second;\n    }\n\n    conf *cf = new conf();\n    cf->dsptr = dsptr;\n    cf->key = key;\n    cf->value = default_value;\n    cf->line = 0;\n    cf->present = false;\n    cf->section = section;\n    ps->insert(std::make_pair(cf->key, cf));\n\n    *ov = cf->value.c_str();\n\n    _lock.unlock();\n    return false;\n}\n\nconst char *configuration::get_string_value(const char *section,\n                                            const char *key,\n                                            const char *default_value,\n                                            const char *dsptr)\n{\n    const char *ov;\n    if (!get_string_value_internal(section, key, default_value, &ov, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%s'\\n\",\n                   section,\n                   key,\n                   default_value);\n        }\n    }\n    return ov;\n}\n\nstd::list<std::string> configuration::get_string_value_list(const char *section,\n                                                            const char *key,\n                                                            char splitter,\n                                                            const char *dsptr)\n{\n    const char *ov;\n    if (!get_string_value_internal(section, key, \"\", &ov, dsptr)) {\n        if (_warning) {\n            printf(\"WARNING: configuration '[%s] %s' is not defined, default value is '%s'\\n\",\n                   section,\n                   key,\n                   \"\");\n        }\n    }\n\n    std::list<std::string> vs;\n    utils::split_args(ov, vs, splitter);\n\n    for (auto &v : vs) {\n        v = std::string(utils::trim_string((char *)v.c_str()));\n    }\n    return vs;\n}\n\nvoid configuration::dump(std::ostream &os)\n{\n    _lock.lock();\n\n    for (auto &s : _configs) {\n        os << \"[\" << s.first << \"]\" << std::endl;\n\n        std::multimap<int, conf *> ordered_entities;\n        for (auto &kv : s.second) {\n            ordered_entities.emplace(kv.second->line, kv.second);\n        }\n\n        for (auto &kv : ordered_entities) {\n            os << \"; \" << kv.second->dsptr << std::endl;\n            os << kv.second->key << \" = \" << kv.second->value << std::endl << std::endl;\n        }\n\n        os << std::endl;\n    }\n\n    _lock.unlock();\n}\n\nvoid configuration::set(const char *section, const char *key, const char *value, const char *dsptr)\n{\n    std::map<std::string, conf *> *psection;\n\n    _lock.lock();\n\n    auto it = _configs.find(section);\n    if (it != _configs.end()) {\n        psection = &it->second;\n    } else {\n        std::map<std::string, conf *> s;\n        psection = &_configs.insert(config_map::value_type(section, s)).first->second;\n    }\n\n    auto it2 = psection->find(key);\n    if (it2 == psection->end()) {\n        conf *cf = new conf();\n        cf->dsptr = dsptr;\n        cf->key = key;\n        cf->value = value;\n        cf->line = 0;\n        cf->present = true;\n        cf->section = section;\n        psection->insert(std::make_pair(cf->key, cf));\n    } else {\n        it2->second->value = value;\n    }\n\n    _lock.unlock();\n}\n\nbool configuration::has_section(const char *section)\n{\n    auto it = _configs.find(section);\n    bool r = (it != _configs.end());\n    if (!r && _warning) {\n        printf(\"WARNING: configuration section '[%s]' is not defined, using default settings\\n\",\n               section);\n    }\n    return r;\n}\n\nbool configuration::has_key(const char *section, const char *key)\n{\n    auto it = _configs.find(section);\n    if (it != _configs.end()) {\n        auto it2 = it->second.find(key);\n        return (it2 != it->second.end());\n    }\n    return false;\n}\n}\n"
  },
  {
    "path": "src/utils/coredump.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <string>\n\nnamespace dsn {\nnamespace utils {\n\nclass coredump\n{\npublic:\n    static void init();\n    static void write();\n};\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/coredump.posix.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"coredump.h\"\n#include <dsn/tool_api.h>\n#include <sys/types.h>\n#include <signal.h>\n#include <dsn/c/app_model.h>\n\nnamespace dsn {\nnamespace utils {\n\nstatic void handle_core_dump(int);\nstatic void handle_term(int);\n\nvoid coredump::init()\n{\n    signal(SIGSEGV, handle_core_dump);\n    signal(SIGTERM, handle_term);\n}\n\nvoid coredump::write()\n{\n    // TODO: not implemented\n    //\n\n    ::dsn::tools::sys_exit.execute(SYS_EXIT_EXCEPTION);\n}\n\nstatic void handle_core_dump(int signal_id)\n{\n    printf(\"got signal id: %d\\n\", signal_id);\n    fflush(stdout);\n    /*\n     * firstly we must set the sig_handler to default,\n     * to prevent the possible inifinite loop\n     * for example: an sigsegv in the coredump::write()\n     */\n    if (signal_id == SIGSEGV) {\n        signal(SIGSEGV, SIG_DFL);\n    }\n    coredump::write();\n}\n\nstatic void handle_term(int signal_id)\n{\n    printf(\"got signal id: %d\\n\", signal_id);\n    fflush(stdout);\n    dsn_exit(0);\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/crc.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <cstdio>\n#include <dsn/utility/crc.h>\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename uintxx_t, uintxx_t uPoly>\nstruct crc_generator\n{\n    typedef uintxx_t uint;\n    static const uintxx_t MSB = ((uintxx_t)1) << (8 * sizeof(uintxx_t) - 1);\n    static const uintxx_t POLY = uPoly;\n    static uintxx_t _crc_table[256];\n    static uintxx_t _uX2N[64];\n\n    //\n    // compute CRC\n    //\n    static uintxx_t compute(const void *pSrc, size_t uSize, uintxx_t uCrc)\n    {\n        const uint8_t *pData = (const uint8_t *)pSrc;\n        size_t uBytes;\n\n        uCrc = ~uCrc;\n\n        while (uSize > 15) {\n            uBytes = 0x80000000u;\n            if (uBytes > uSize)\n                uBytes = uSize;\n            uSize -= uBytes;\n\n            for (; uBytes > 15; uBytes -= 16, pData += 16) {\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[0])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[1])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[2])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[3])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[4])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[5])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[6])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[7])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[8])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[9])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[10])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[11])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[12])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[13])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[14])] ^ (uCrc >> 8);\n                uCrc = _crc_table[(uint8_t)(uCrc ^ pData[15])] ^ (uCrc >> 8);\n            }\n\n            uSize += uBytes;\n        }\n\n        for (uBytes = uSize; uBytes > 0; uBytes -= 1, pData += 1)\n            uCrc = _crc_table[(uint8_t)(uCrc ^ pData[0])] ^ (uCrc >> 8);\n\n        uCrc = ~uCrc;\n\n        return (uCrc);\n    };\n\n    //\n    // Returns (a * b) mod POLY.\n    // \"a\" and \"b\" are represented in \"reversed\" order -- LSB is x**(XX-1) coefficient, MSB is x^0\n    // coefficient.\n    // \"POLY\" is represented in the same manner except for omitted x**XX coefficient\n    //\n    static uintxx_t MulPoly(uintxx_t a, uintxx_t b)\n    {\n        uintxx_t r;\n\n        if (a == 0)\n            return (0);\n\n        r = 0;\n        do {\n            if (a & MSB)\n                r ^= b;\n\n            if (b & 1)\n                b = (b >> 1) ^ POLY;\n            else\n                b >>= 1;\n\n            a <<= 1;\n        } while (a != 0);\n\n        return (r);\n    };\n\n    //\n    // Returns (x ** (8*uSize)) mod POLY\n    //\n    static uintxx_t ComputeX_N(uint64_t uSize)\n    {\n        size_t i;\n        uintxx_t r;\n\n        r = MSB; // r = 1\n        for (i = 0; uSize != 0; uSize >>= 1, i += 1) {\n            if (uSize & 1)\n                r = MulPoly(r, _uX2N[i]);\n        }\n\n        return (r);\n    };\n\n    //\n    // Allows to change initial CRC value\n    //\n    static uintxx_t ConvertInitialCrc(uintxx_t uNew, uintxx_t uOld, uintxx_t uCrc, size_t uSize)\n    {\n        //\n        // CRC (A, uSize, uCrc) = (uCrc * x**uSize + A * x**XX) mod POLY (let's forget about double\n        // NOTs of uCrc)\n        //\n        // we know uCrc(uOld) = (uOld * x**uSize + A * x**XX) mod POLY; we need to compute\n        // uCrc(uNew) = (uNew * x**uSize + A * x**XX) mod POLY\n        //\n        // uCrc(uNew) = uCrc(Old) + (uNew - uOld) * x**uSize)\n        //\n\n        uNew ^= uOld;\n        uOld = ComputeX_N(uSize);\n        uOld = MulPoly(uOld, uNew);\n        uCrc ^= uOld;\n\n        return (uCrc);\n    };\n\n    //\n    // Given\n    //      uFinalCrcA = ComputeCrc (A, uSizeA, uInitialCrcA)\n    // and\n    //      uFinalCrcB = ComputeCrc (B, uSizeB, uInitialCrcB),\n    // compute CRC of concatenation of A and B\n    //      uFinalCrcAB = ComputeCrc (AB, uSizeA + uSizeB, uInitialCrcAB)\n    // without touching A and B\n    //\n    // NB: uSizeA and/or uSizeB may be 0s (this trick may be used to \"recompute\" CRC for another\n    // initial value)\n    //\n\n    static uintxx_t concatenate(uintxx_t uInitialCrcAB,\n                                uintxx_t uInitialCrcA,\n                                uintxx_t uFinalCrcA,\n                                uint64_t uSizeA,\n                                uintxx_t uInitialCrcB,\n                                uintxx_t uFinalCrcB,\n                                uint64_t uSizeB)\n    {\n        uintxx_t uX_nA, uX_nB, uFinalCrcAB;\n\n        //\n        // Crc (X, uSizeX, uInitialCrcX) = ~(((~uInitialCrcX) * x**uSizeX + X * x**XX) mod POLY)\n        //\n\n        //\n        // first, convert CRC's to canonical values getting rid of double bitwise NOT around uCrc\n        //\n        uInitialCrcAB = ~uInitialCrcAB;\n        uInitialCrcA = ~uInitialCrcA;\n        uFinalCrcA = ~uFinalCrcA;\n        uInitialCrcB = ~uInitialCrcB;\n        uFinalCrcB = ~uFinalCrcB;\n\n        //\n        // convert uFinalCrcX into canonical form, so that\n        //      uFinalCrcX = (X * x**XX) mod POLY\n        //\n        uX_nA = ComputeX_N(uSizeA);\n        uFinalCrcA ^= MulPoly(uX_nA, uInitialCrcA);\n        uX_nB = ComputeX_N(uSizeB);\n        uFinalCrcB ^= MulPoly(uX_nB, uInitialCrcB);\n\n        //\n        // we know\n        //      uFinalCrcA = (A * x**XX) mod POLY\n        //      uFinalCrcB = (B * x**XX) mod POLY\n        // and need to compute\n        //      uFinalCrcAB = (AB * x**XX) mod POLY =\n        //                  = ((A * x**uSizeB + B) * x**XX) mod POLY =\n        //                  = (A * x**XX) * x**uSizeB + B * x**XX mod POLY =\n        //                  = uFinalCrcB + (uFinalCrcA * x**uSizeB) mod POLY\n        //\n\n        uFinalCrcAB = uFinalCrcB ^ MulPoly(uFinalCrcA, uX_nB);\n\n        //\n        // Finally, adjust initial value; we have\n        //      uFinalCrcAB = (AB * x**XX) mod POLY\n        // but want to have\n        //      uFinalCrcAB = (UInitialCrcAB * x**(uSizeA + uSizeB) + AB * x**XX) mod POLY\n        //\n\n        uFinalCrcAB ^= MulPoly(uInitialCrcAB, MulPoly(uX_nA, uX_nB));\n\n        // convert back to double NOT\n        uFinalCrcAB = ~uFinalCrcAB;\n\n        return (uFinalCrcAB);\n    };\n\n    static void InitializeTables(void)\n    {\n        size_t i, j;\n        uintxx_t k;\n\n        _uX2N[0] = MSB >> 8;\n        for (i = 1; i < sizeof(_uX2N) / sizeof(_uX2N[0]); ++i)\n            _uX2N[i] = MulPoly(_uX2N[i - 1], _uX2N[i - 1]);\n\n        for (i = 0; i < 256; ++i) {\n            k = (uintxx_t)i;\n            for (j = 0; j < 8; ++j) {\n                if (k & 1)\n                    k = (k >> 1) ^ POLY;\n                else\n                    k = (k >> 1);\n            }\n            _crc_table[i] = k;\n        }\n    }\n\n    static void PrintTables(char *pTypeName, char *pClassName)\n    {\n        size_t i, w;\n\n        InitializeTables();\n\n        printf(\"%s %s::_uX2N[sizeof (%s::_uX2N) / sizeof (%s::_uX2N[0])] = {\",\n               pTypeName,\n               pClassName,\n               pClassName,\n               pClassName);\n        for (i = w = 0; i < sizeof(_uX2N) / sizeof(_uX2N[0]); ++i) {\n            if (i != 0)\n                printf(\",\");\n            if (w == 0)\n                printf(\"\\n   \");\n            printf(\" 0x%0*llx\", static_cast<int>(sizeof(uintxx_t) * 2), (uint64_t)_uX2N[i]);\n            w = (w + sizeof(uintxx_t)) & 31;\n        }\n        printf(\"\\n};\\n\\n\");\n\n        printf(\"%s %s::_crc_table[sizeof (%s::_crc_table) / sizeof (%s::_crc_table[0])] = {\",\n               pTypeName,\n               pClassName,\n               pClassName,\n               pClassName);\n        for (i = w = 0; i < sizeof(_crc_table) / sizeof(_crc_table[0]); ++i) {\n            if (i != 0)\n                printf(\",\");\n            if (w == 0)\n                printf(\"\\n   \");\n            printf(\" 0x%0*llx\", static_cast<int>(sizeof(uintxx_t) * 2), (uint64_t)_crc_table[i]);\n            w = (w + sizeof(uintxx_t)) & 31;\n        }\n        printf(\"\\n};\\n\\n\");\n    };\n};\n\n#define BIT64(n) (1ull << (63 - (n)))\n#define crc64_POLY                                                                                 \\\n    (BIT64(63) + BIT64(61) + BIT64(59) + BIT64(58) + BIT64(56) + BIT64(55) + BIT64(52) +           \\\n     BIT64(49) + BIT64(48) + BIT64(47) + BIT64(46) + BIT64(44) + BIT64(41) + BIT64(37) +           \\\n     BIT64(36) + BIT64(34) + BIT64(32) + BIT64(31) + BIT64(28) + BIT64(26) + BIT64(23) +           \\\n     BIT64(22) + BIT64(19) + BIT64(16) + BIT64(13) + BIT64(12) + BIT64(10) + BIT64(9) + BIT64(6) + \\\n     BIT64(4) + BIT64(3) + BIT64(0))\n\n#define BIT32(n) (1u << (31 - (n)))\n#define crc32_POLY                                                                                 \\\n    (BIT32(28) + BIT32(27) + BIT32(26) + BIT32(25) + BIT32(23) + BIT32(22) + BIT32(20) +           \\\n     BIT32(19) + BIT32(18) + BIT32(14) + BIT32(13) + BIT32(11) + BIT32(10) + BIT32(9) + BIT32(8) + \\\n     BIT32(6) + BIT32(0))\n\ntypedef crc_generator<uint32_t, crc32_POLY> crc32;\ntypedef crc_generator<uint64_t, crc64_POLY> crc64;\n\ntemplate <>\nuint32_t crc32::_uX2N[sizeof(crc32::_uX2N) / sizeof(crc32::_uX2N[0])] = {\n    0x00800000, 0x00008000, 0x82f63b78, 0x6ea2d55c, 0x18b8ea18, 0x510ac59a, 0xb82be955, 0xb8fdb1e7,\n    0x88e56f72, 0x74c360a4, 0xe4172b16, 0x0d65762a, 0x35d73a62, 0x28461564, 0xbf455269, 0xe2ea32dc,\n    0xfe7740e6, 0xf946610b, 0x3c204f8f, 0x538586e3, 0x59726915, 0x734d5309, 0xbc1ac763, 0x7d0722cc,\n    0xd289cabe, 0xe94ca9bc, 0x05b74f3f, 0xa51e1f42, 0x40000000, 0x20000000, 0x08000000, 0x00800000,\n    0x00008000, 0x82f63b78, 0x6ea2d55c, 0x18b8ea18, 0x510ac59a, 0xb82be955, 0xb8fdb1e7, 0x88e56f72,\n    0x74c360a4, 0xe4172b16, 0x0d65762a, 0x35d73a62, 0x28461564, 0xbf455269, 0xe2ea32dc, 0xfe7740e6,\n    0xf946610b, 0x3c204f8f, 0x538586e3, 0x59726915, 0x734d5309, 0xbc1ac763, 0x7d0722cc, 0xd289cabe,\n    0xe94ca9bc, 0x05b74f3f, 0xa51e1f42, 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000};\n\ntemplate <>\nuint32_t crc32::_crc_table[sizeof(crc32::_crc_table) / sizeof(crc32::_crc_table[0])] = {\n    0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,\n    0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,\n    0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,\n    0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,\n    0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,\n    0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,\n    0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,\n    0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,\n    0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,\n    0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,\n    0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,\n    0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,\n    0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,\n    0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,\n    0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,\n    0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,\n    0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,\n    0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,\n    0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,\n    0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,\n    0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,\n    0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,\n    0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,\n    0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,\n    0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,\n    0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,\n    0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,\n    0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,\n    0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,\n    0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,\n    0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,\n    0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351};\n\ntemplate <>\nuint64_t crc64::_uX2N[sizeof(crc64::_uX2N) / sizeof(crc64::_uX2N[0])] = {\n    0x0080000000000000, 0x0000800000000000, 0x0000000080000000, 0x9a6c9329ac4bc9b5,\n    0x10f4bb0f129310d6, 0x70f05dcea2ebd226, 0x311211205672822d, 0x2fc297db0f46c96e,\n    0xca4d536fabf7da84, 0xfb4cdc3b379ee6ed, 0xea261148df25140a, 0x59ccb2c07aa6c9b4,\n    0x20b3674a839af27a, 0x2d8e1986da94d583, 0x42cdf4c20337635d, 0x1d78724bf0f26839,\n    0xb96c84e0afb34bd5, 0x5d2e1fcd2df0a3ea, 0xcd9506572332be42, 0x23bda2427f7d690f,\n    0x347a953232374f07, 0x1c2a807ac2a8ceea, 0x9b92ad0e14fe1460, 0x2574114889f670b2,\n    0x4a84a6c45e3bf520, 0x915bbac21cd1c7ff, 0xb0290ec579f291f5, 0xcf2548505c624e6e,\n    0xb154f27bf08a8207, 0xce4e92344baf7d35, 0x51da8d7e057c5eb3, 0x9fb10823f5be15df,\n    0x73b825b3ff1f71cf, 0x5db436c5406ebb74, 0xfa7ed8f3ec3f2bca, 0xc4d58efdc61b9ef6,\n    0xa7e39e61e855bd45, 0x97ad46f9dd1bf2f1, 0x1a0abb01f853ee6b, 0x3f0827c3348f8215,\n    0x4eb68c4506134607, 0x4a46f6de5df34e0a, 0x2d855d6a1c57a8dd, 0x8688da58e1115812,\n    0x5232f417fc7c7300, 0xa4080fb2e767d8da, 0xd515a7e17693e562, 0x1181f7c862e94226,\n    0x9e23cd058204ca91, 0x9b8992c57a0aed82, 0xb2c0afb84609b6ff, 0x2f7160553a5ea018,\n    0x3cd378b5c99f2722, 0x814054ad61a3b058, 0xbf766189fce806d8, 0x85a5e898ac49f86f,\n    0x34830d11bc84f346, 0x9644d95b173c8c1c, 0x150401ac9ac759b1, 0xebe1f7f46fb00eba,\n    0x8ee4ce0c2e2bd662, 0x4000000000000000, 0x2000000000000000, 0x0800000000000000};\n\ntemplate <>\nuint64_t crc64::_crc_table[sizeof(crc64::_crc_table) / sizeof(crc64::_crc_table[0])] = {\n    0x0000000000000000, 0x7f6ef0c830358979, 0xfedde190606b12f2, 0x81b31158505e9b8b,\n    0xc962e5739841b68f, 0xb60c15bba8743ff6, 0x37bf04e3f82aa47d, 0x48d1f42bc81f2d04,\n    0xa61cecb46814fe75, 0xd9721c7c5821770c, 0x58c10d24087fec87, 0x27affdec384a65fe,\n    0x6f7e09c7f05548fa, 0x1010f90fc060c183, 0x91a3e857903e5a08, 0xeecd189fa00bd371,\n    0x78e0ff3b88be6f81, 0x078e0ff3b88be6f8, 0x863d1eabe8d57d73, 0xf953ee63d8e0f40a,\n    0xb1821a4810ffd90e, 0xceecea8020ca5077, 0x4f5ffbd87094cbfc, 0x30310b1040a14285,\n    0xdefc138fe0aa91f4, 0xa192e347d09f188d, 0x2021f21f80c18306, 0x5f4f02d7b0f40a7f,\n    0x179ef6fc78eb277b, 0x68f0063448deae02, 0xe943176c18803589, 0x962de7a428b5bcf0,\n    0xf1c1fe77117cdf02, 0x8eaf0ebf2149567b, 0x0f1c1fe77117cdf0, 0x7072ef2f41224489,\n    0x38a31b04893d698d, 0x47cdebccb908e0f4, 0xc67efa94e9567b7f, 0xb9100a5cd963f206,\n    0x57dd12c379682177, 0x28b3e20b495da80e, 0xa900f35319033385, 0xd66e039b2936bafc,\n    0x9ebff7b0e12997f8, 0xe1d10778d11c1e81, 0x606216208142850a, 0x1f0ce6e8b1770c73,\n    0x8921014c99c2b083, 0xf64ff184a9f739fa, 0x77fce0dcf9a9a271, 0x08921014c99c2b08,\n    0x4043e43f0183060c, 0x3f2d14f731b68f75, 0xbe9e05af61e814fe, 0xc1f0f56751dd9d87,\n    0x2f3dedf8f1d64ef6, 0x50531d30c1e3c78f, 0xd1e00c6891bd5c04, 0xae8efca0a188d57d,\n    0xe65f088b6997f879, 0x9931f84359a27100, 0x1882e91b09fcea8b, 0x67ec19d339c963f2,\n    0xd75adabd7a6e2d6f, 0xa8342a754a5ba416, 0x29873b2d1a053f9d, 0x56e9cbe52a30b6e4,\n    0x1e383fcee22f9be0, 0x6156cf06d21a1299, 0xe0e5de5e82448912, 0x9f8b2e96b271006b,\n    0x71463609127ad31a, 0x0e28c6c1224f5a63, 0x8f9bd7997211c1e8, 0xf0f5275142244891,\n    0xb824d37a8a3b6595, 0xc74a23b2ba0eecec, 0x46f932eaea507767, 0x3997c222da65fe1e,\n    0xafba2586f2d042ee, 0xd0d4d54ec2e5cb97, 0x5167c41692bb501c, 0x2e0934dea28ed965,\n    0x66d8c0f56a91f461, 0x19b6303d5aa47d18, 0x980521650afae693, 0xe76bd1ad3acf6fea,\n    0x09a6c9329ac4bc9b, 0x76c839faaaf135e2, 0xf77b28a2faafae69, 0x8815d86aca9a2710,\n    0xc0c42c4102850a14, 0xbfaadc8932b0836d, 0x3e19cdd162ee18e6, 0x41773d1952db919f,\n    0x269b24ca6b12f26d, 0x59f5d4025b277b14, 0xd846c55a0b79e09f, 0xa72835923b4c69e6,\n    0xeff9c1b9f35344e2, 0x90973171c366cd9b, 0x1124202993385610, 0x6e4ad0e1a30ddf69,\n    0x8087c87e03060c18, 0xffe938b633338561, 0x7e5a29ee636d1eea, 0x0134d92653589793,\n    0x49e52d0d9b47ba97, 0x368bddc5ab7233ee, 0xb738cc9dfb2ca865, 0xc8563c55cb19211c,\n    0x5e7bdbf1e3ac9dec, 0x21152b39d3991495, 0xa0a63a6183c78f1e, 0xdfc8caa9b3f20667,\n    0x97193e827bed2b63, 0xe877ce4a4bd8a21a, 0x69c4df121b863991, 0x16aa2fda2bb3b0e8,\n    0xf86737458bb86399, 0x8709c78dbb8deae0, 0x06bad6d5ebd3716b, 0x79d4261ddbe6f812,\n    0x3105d23613f9d516, 0x4e6b22fe23cc5c6f, 0xcfd833a67392c7e4, 0xb0b6c36e43a74e9d,\n    0x9a6c9329ac4bc9b5, 0xe50263e19c7e40cc, 0x64b172b9cc20db47, 0x1bdf8271fc15523e,\n    0x530e765a340a7f3a, 0x2c608692043ff643, 0xadd397ca54616dc8, 0xd2bd67026454e4b1,\n    0x3c707f9dc45f37c0, 0x431e8f55f46abeb9, 0xc2ad9e0da4342532, 0xbdc36ec59401ac4b,\n    0xf5129aee5c1e814f, 0x8a7c6a266c2b0836, 0x0bcf7b7e3c7593bd, 0x74a18bb60c401ac4,\n    0xe28c6c1224f5a634, 0x9de29cda14c02f4d, 0x1c518d82449eb4c6, 0x633f7d4a74ab3dbf,\n    0x2bee8961bcb410bb, 0x548079a98c8199c2, 0xd53368f1dcdf0249, 0xaa5d9839ecea8b30,\n    0x449080a64ce15841, 0x3bfe706e7cd4d138, 0xba4d61362c8a4ab3, 0xc52391fe1cbfc3ca,\n    0x8df265d5d4a0eece, 0xf29c951de49567b7, 0x732f8445b4cbfc3c, 0x0c41748d84fe7545,\n    0x6bad6d5ebd3716b7, 0x14c39d968d029fce, 0x95708ccedd5c0445, 0xea1e7c06ed698d3c,\n    0xa2cf882d2576a038, 0xdda178e515432941, 0x5c1269bd451db2ca, 0x237c997575283bb3,\n    0xcdb181ead523e8c2, 0xb2df7122e51661bb, 0x336c607ab548fa30, 0x4c0290b2857d7349,\n    0x04d364994d625e4d, 0x7bbd94517d57d734, 0xfa0e85092d094cbf, 0x856075c11d3cc5c6,\n    0x134d926535897936, 0x6c2362ad05bcf04f, 0xed9073f555e26bc4, 0x92fe833d65d7e2bd,\n    0xda2f7716adc8cfb9, 0xa54187de9dfd46c0, 0x24f29686cda3dd4b, 0x5b9c664efd965432,\n    0xb5517ed15d9d8743, 0xca3f8e196da80e3a, 0x4b8c9f413df695b1, 0x34e26f890dc31cc8,\n    0x7c339ba2c5dc31cc, 0x035d6b6af5e9b8b5, 0x82ee7a32a5b7233e, 0xfd808afa9582aa47,\n    0x4d364994d625e4da, 0x3258b95ce6106da3, 0xb3eba804b64ef628, 0xcc8558cc867b7f51,\n    0x8454ace74e645255, 0xfb3a5c2f7e51db2c, 0x7a894d772e0f40a7, 0x05e7bdbf1e3ac9de,\n    0xeb2aa520be311aaf, 0x944455e88e0493d6, 0x15f744b0de5a085d, 0x6a99b478ee6f8124,\n    0x224840532670ac20, 0x5d26b09b16452559, 0xdc95a1c3461bbed2, 0xa3fb510b762e37ab,\n    0x35d6b6af5e9b8b5b, 0x4ab846676eae0222, 0xcb0b573f3ef099a9, 0xb465a7f70ec510d0,\n    0xfcb453dcc6da3dd4, 0x83daa314f6efb4ad, 0x0269b24ca6b12f26, 0x7d0742849684a65f,\n    0x93ca5a1b368f752e, 0xeca4aad306bafc57, 0x6d17bb8b56e467dc, 0x12794b4366d1eea5,\n    0x5aa8bf68aecec3a1, 0x25c64fa09efb4ad8, 0xa4755ef8cea5d153, 0xdb1bae30fe90582a,\n    0xbcf7b7e3c7593bd8, 0xc399472bf76cb2a1, 0x422a5673a732292a, 0x3d44a6bb9707a053,\n    0x759552905f188d57, 0x0afba2586f2d042e, 0x8b48b3003f739fa5, 0xf42643c80f4616dc,\n    0x1aeb5b57af4dc5ad, 0x6585ab9f9f784cd4, 0xe436bac7cf26d75f, 0x9b584a0fff135e26,\n    0xd389be24370c7322, 0xace74eec0739fa5b, 0x2d545fb4576761d0, 0x523aaf7c6752e8a9,\n    0xc41748d84fe75459, 0xbb79b8107fd2dd20, 0x3acaa9482f8c46ab, 0x45a459801fb9cfd2,\n    0x0d75adabd7a6e2d6, 0x721b5d63e7936baf, 0xf3a84c3bb7cdf024, 0x8cc6bcf387f8795d,\n    0x620ba46c27f3aa2c, 0x1d6554a417c62355, 0x9cd645fc4798b8de, 0xe3b8b53477ad31a7,\n    0xab69411fbfb21ca3, 0xd407b1d78f8795da, 0x55b4a08fdfd90e51, 0x2ada5047efec8728};\n\n#undef crc32_POLY\n#undef crc64_POLY\n#undef BIT64\n#undef BIT32\n}\n}\n\nnamespace dsn {\nnamespace utils {\nuint32_t crc32_calc(const void *ptr, size_t size, uint32_t init_crc)\n{\n    return dsn::utils::crc32::compute(ptr, size, init_crc);\n}\n\nuint32_t crc32_concat(uint32_t xy_init,\n                      uint32_t x_init,\n                      uint32_t x_final,\n                      size_t x_size,\n                      uint32_t y_init,\n                      uint32_t y_final,\n                      size_t y_size)\n{\n    return dsn::utils::crc32::concatenate(\n        0, x_init, x_final, (uint64_t)x_size, y_init, y_final, (uint64_t)y_size);\n}\n\nuint64_t crc64_calc(const void *ptr, size_t size, uint64_t init_crc)\n{\n    return dsn::utils::crc64::compute(ptr, size, init_crc);\n}\n\nuint64_t crc64_concat(uint32_t xy_init,\n                      uint64_t x_init,\n                      uint64_t x_final,\n                      size_t x_size,\n                      uint64_t y_init,\n                      uint64_t y_final,\n                      size_t y_size)\n{\n    return ::dsn::utils::crc64::concatenate(\n        0, x_init, x_final, (uint64_t)x_size, y_init, y_final, (uint64_t)y_size);\n}\n}\n}\n"
  },
  {
    "path": "src/utils/error_code.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#include <dsn/utility/error_code.h>\n\nnamespace dsn {\n/*static*/\nint error_code::max()\n{\n    return dsn::utils::customized_id_mgr<dsn::error_code>::instance().max_value();\n}\n/*static*/\nbool error_code::is_exist(const char *name)\n{\n    return dsn::utils::customized_id_mgr<dsn::error_code>::instance().get_id(name) != -1;\n}\n/*static*/\nerror_code error_code::try_get(const char *name, error_code default_value)\n{\n    int ans = dsn::utils::customized_id_mgr<dsn::error_code>::instance().get_id(name);\n    if (ans == -1)\n        return default_value;\n    return error_code(ans);\n}\n/*static*/\nerror_code error_code::try_get(const std::string &name, error_code default_value)\n{\n    int ans = dsn::utils::customized_id_mgr<dsn::error_code>::instance().get_id(name);\n    if (ans == -1)\n        return default_value;\n    return error_code(ans);\n}\n\nerror_code::error_code(const char *name)\n{\n    _internal_code = dsn::utils::customized_id_mgr<dsn::error_code>::instance().register_id(name);\n}\n\nconst char *error_code::to_string() const\n{\n    return dsn::utils::customized_id_mgr<dsn::error_code>::instance().get_name(_internal_code);\n}\n}\n"
  },
  {
    "path": "src/utils/fail_point.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n// Copyright 2017 PingCAP, Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"fail_point_impl.h\"\n\n#include <dsn/c/api_layer1.h>\n// TOOD(wutao1): use <regex> instead when our lowest compiler support\n//               advances to gcc-4.9.\n#include <boost/regex.hpp>\n#include <dsn/utility/rand.h>\n\nnamespace dsn {\nnamespace fail {\n\nstatic fail_point_registry REGISTRY;\n\n/*extern*/ const std::string *eval(string_view name)\n{\n    fail_point *p = REGISTRY.try_get(name);\n    if (!p) {\n        return nullptr;\n    }\n    return p->eval();\n}\n\ninline const char *task_type_to_string(fail_point::task_type t)\n{\n    switch (t) {\n    case fail_point::Off:\n        return \"Off\";\n    case fail_point::Return:\n        return \"Return\";\n    case fail_point::Print:\n        return \"Print\";\n    case fail_point::Void:\n        return \"Void\";\n    default:\n        dfatal(\"unexpected type: %d\", t);\n        __builtin_unreachable();\n    }\n}\n\n/*extern*/ void cfg(string_view name, string_view action)\n{\n    fail_point &p = REGISTRY.create_if_not_exists(name);\n    p.set_action(action);\n    ddebug(\"add fail_point [name: %s, task: %s(%s), frequency: %d%, max_count: %d]\",\n           name.data(),\n           task_type_to_string(p.get_task()),\n           p.get_arg().data(),\n           p.get_frequency(),\n           p.get_max_count());\n}\n\n/*static*/ bool _S_FAIL_POINT_ENABLED = false;\n\n/*extern*/ void setup() { _S_FAIL_POINT_ENABLED = true; }\n\n/*extern*/ void teardown()\n{\n    REGISTRY.clear();\n    _S_FAIL_POINT_ENABLED = false;\n}\n\nvoid fail_point::set_action(string_view action)\n{\n    if (!parse_from_string(action)) {\n        dfatal(\"unrecognized command: %s\", action.data());\n    }\n}\n\nbool fail_point::parse_from_string(string_view action)\n{\n    _max_cnt = -1;\n    _freq = 100;\n\n    boost::regex regex(R\"((\\d+\\%)?(\\d+\\*)?(\\w+)(\\((.*)\\))?)\");\n    boost::smatch match;\n\n    std::string tmp(action.data(), action.length());\n    if (boost::regex_match(tmp, match, regex)) {\n        if (match.size() == 6) {\n            boost::ssub_match sub_match = match[1];\n            if (!sub_match.str().empty()) {\n                sscanf(sub_match.str().data(), \"%d%%\", &_freq);\n            }\n\n            sub_match = match[2];\n            if (!sub_match.str().empty()) {\n                sscanf(sub_match.str().data(), \"%d*\", &_max_cnt);\n            }\n\n            sub_match = match[3];\n            std::string task_type = sub_match.str();\n            if (task_type.compare(\"off\") == 0) {\n                _task = Off;\n            } else if (task_type.compare(\"return\") == 0) {\n                _task = Return;\n            } else if (task_type.compare(\"print\") == 0) {\n                _task = Print;\n            } else if (task_type.compare(\"void\") == 0) {\n                _task = Void;\n            } else {\n                return false;\n            }\n\n            sub_match = match[5];\n            if (!sub_match.str().empty()) {\n                _arg = sub_match.str();\n            }\n\n            return true;\n        }\n    }\n    return false;\n}\n\nconst std::string *fail_point::eval()\n{\n    uint32_t r = rand::next_u32(0, 100);\n    if (r > _freq) {\n        return nullptr;\n    }\n    if (_max_cnt == 0) {\n        return nullptr;\n    }\n    _max_cnt--;\n    ddebug(\"fail on %s\", _name.data());\n\n    switch (_task) {\n    case Off:\n        break;\n    case Void:\n    case Return:\n        return &_arg;\n    case Print:\n        ddebug(_arg.data());\n        break;\n    }\n    return nullptr;\n}\n\n} // namespace fail\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/fail_point_impl.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n// Copyright 2017 PingCAP, Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#pragma once\n\n#include <dsn/utility/fail_point.h>\n#include <dsn/c/api_utilities.h>\n#include <dsn/utility/ports.h>\n#include <mutex>\n#include <unordered_map>\n#include <utility>\n\nnamespace dsn {\nnamespace fail {\n\nstruct fail_point\n{\n    enum task_type\n    {\n        // `action` contain `off()`, which would `close` the fail_point whose `function` passed will\n        // not be executed;\n        Off,\n        // `action` contain `return()`, which would `return` args passed and execute `return` type\n        // function passed. it's usually used for `FAIL_POINT_INJECT_F`\n        Return,\n        // `action` contain `print()`, which would only just print `action` string value and ignore\n        // the `function` passed\n        Print,\n        // `action` contain `void()`, which would return args and execute `function` passed that\n        // better mark as `void` type, it's usually used for `FAIL_POINT_INJECT_NOT_RETURN_F` to\n        // avoid `return` function\n        Void,\n    };\n\n    void set_action(string_view action);\n\n    const std::string *eval();\n\n    explicit fail_point(string_view name) : _name(name) {}\n\n    /// for test only\n    fail_point(task_type t, std::string arg, int freq, int max_cnt)\n        : _task(t), _arg(std::move(arg)), _freq(freq), _max_cnt(max_cnt)\n    {\n    }\n\n    /// for test only\n    fail_point() = default;\n\n    bool parse_from_string(string_view action);\n\n    friend inline bool operator==(const fail_point &p1, const fail_point &p2)\n    {\n        return p1._task == p2._task && p1._arg == p2._arg && p1._freq == p2._freq &&\n               p1._max_cnt == p2._max_cnt;\n    }\n\n    task_type get_task() const { return _task; }\n\n    std::string get_arg() const { return _arg; }\n\n    int get_frequency() const { return _freq; }\n\n    int get_max_count() const { return _max_cnt; }\n\nprivate:\n    std::string _name;\n    task_type _task{Off};\n    std::string _arg;\n    int _freq{100};\n    int _max_cnt{-1}; // TODO(wutao1): not thread-safe\n};\n\nstruct fail_point_registry\n{\n    fail_point &create_if_not_exists(string_view name)\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n\n        auto it = _registry.emplace(std::string(name), fail_point(name)).first;\n        return it->second;\n    }\n\n    fail_point *try_get(string_view name)\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n\n        auto it = _registry.find(std::string(name.data(), name.length()));\n        if (it == _registry.end()) {\n            return nullptr;\n        }\n        return &it->second;\n    }\n\n    void clear()\n    {\n        std::lock_guard<std::mutex> guard(_mu);\n        _registry.clear();\n    }\n\nprivate:\n    mutable std::mutex _mu;\n    std::unordered_map<std::string, fail_point> _registry;\n};\n\n} // namespace fail\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/filesystem.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     File system utility functions.\n *\n * Revision history:\n *     2015-08-24, HX Lin(linmajia@live.com), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <fstream>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/defer.h>\n#include <dsn/utility/fail_point.h>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/strings.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/safe_strerror_posix.h>\n\n#include <sys/stat.h>\n#include <errno.h>\n#include <fcntl.h>\n#include <stdio.h>\n#include <boost/filesystem.hpp>\n#include <openssl/md5.h>\n#include <ftw.h>\n\n#define getcwd_ getcwd\n#define rmdir_ rmdir\n#define mkdir_(path) mkdir(path, 0775)\n#define close_ close\n#define stat_ stat\n\nnamespace dsn {\nnamespace utils {\nnamespace filesystem {\n\n#define _FS_COLON ':'\n#define _FS_PERIOD '.'\n#define _FS_SLASH '/'\n#define _FS_BSLASH '\\\\'\n#define _FS_STAR '*'\n#define _FS_QUESTION '?'\n#define _FS_NULL '\\0'\n#define _FS_ISSEP(x) ((x) == _FS_SLASH || (x) == _FS_BSLASH)\n\nstatic __thread char tls_path_buffer[PATH_MAX];\n#define TLS_PATH_BUFFER_SIZE PATH_MAX\n\n// npath need to be a normalized path\nstatic inline int get_stat_internal(const std::string &npath, struct stat_ &st)\n{\n    int err;\n\n    err = ::stat_(npath.c_str(), &st);\n    if (err != 0) {\n        err = errno;\n    }\n\n    return err;\n}\n\nint get_normalized_path(const std::string &path, std::string &npath)\n{\n    char sep;\n    size_t i;\n    size_t pos;\n    size_t len;\n    char c;\n\n    if (path.empty()) {\n        npath = \"\";\n        return 0;\n    }\n\n    len = path.length();\n\n    sep = _FS_SLASH;\n    i = 0;\n    pos = 0;\n    while (i < len) {\n        c = path[i++];\n        if (c == _FS_SLASH) {\n            while ((i < len) && _FS_ISSEP(path[i])) {\n                i++;\n            }\n        }\n\n        tls_path_buffer[pos++] = c;\n    }\n\n    tls_path_buffer[pos] = _FS_NULL;\n    if ((c == sep) && (pos > 1)) {\n        tls_path_buffer[pos - 1] = _FS_NULL;\n    }\n\n    dassert(tls_path_buffer[0] != _FS_NULL, \"Normalized path cannot be empty!\");\n    npath = tls_path_buffer;\n\n    return 0;\n}\n\nstatic __thread struct\n{\n    ftw_handler *handler;\n    bool recursive;\n} tls_ftw_ctx;\n\nstatic int ftw_wrapper(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf)\n{\n    if (!tls_ftw_ctx.recursive && (ftwbuf->level > 1)) {\n        if ((typeflag == FTW_D) || (typeflag == FTW_DP)) {\n            return FTW_SKIP_SUBTREE;\n        } else {\n            return FTW_SKIP_SIBLINGS;\n        }\n    }\n\n    return (*tls_ftw_ctx.handler)(fpath, typeflag, ftwbuf);\n}\n\nbool file_tree_walk(const std::string &dirpath, ftw_handler handler, bool recursive)\n{\n    tls_ftw_ctx.handler = &handler;\n    tls_ftw_ctx.recursive = recursive;\n#if defined(__linux__)\n    int flags = FTW_ACTIONRETVAL;\n#else\n    int flags = 0;\n#endif // defined(__linux__)\n    if (recursive) {\n        flags |= FTW_DEPTH;\n    }\n    int ret = ::nftw(dirpath.c_str(), ftw_wrapper, 1, flags);\n\n    return (ret == 0);\n}\n\n// npath need to be a normalized path\nstatic bool path_exists_internal(const std::string &npath, int type)\n{\n    bool ret;\n    struct stat_ st;\n    int err;\n\n    err = dsn::utils::filesystem::get_stat_internal(npath, st);\n    if (err != 0) {\n        return false;\n    }\n\n    switch (type) {\n    case FTW_F:\n        ret = S_ISREG(st.st_mode);\n        break;\n    case FTW_D:\n        ret = S_ISDIR(st.st_mode);\n        break;\n    case FTW_NS:\n        ret = S_ISREG(st.st_mode) || S_ISDIR(st.st_mode);\n        break;\n    default:\n        ret = false;\n        break;\n    }\n\n    return ret;\n}\n\nbool path_exists(const std::string &path)\n{\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    return dsn::utils::filesystem::path_exists_internal(npath, FTW_NS);\n}\n\nbool directory_exists(const std::string &path)\n{\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    return dsn::utils::filesystem::path_exists_internal(npath, FTW_D);\n}\n\nbool file_exists(const std::string &path)\n{\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    return dsn::utils::filesystem::path_exists_internal(npath, FTW_F);\n}\n\nstatic bool get_subpaths(const std::string &path,\n                         std::vector<std::string> &sub_list,\n                         bool recursive,\n                         int typeflags)\n{\n    std::string npath;\n    bool ret;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    if (!dsn::utils::filesystem::path_exists_internal(npath, FTW_D)) {\n        return false;\n    }\n\n    switch (typeflags) {\n    case FTW_F:\n        ret = dsn::utils::filesystem::file_tree_walk(\n            npath,\n            [&sub_list](const char *fpath, int typeflag, struct FTW *ftwbuf) {\n                if (typeflag == FTW_F) {\n                    sub_list.push_back(fpath);\n                }\n\n                return FTW_CONTINUE;\n            },\n            recursive);\n        break;\n\n    case FTW_D:\n        ret = dsn::utils::filesystem::file_tree_walk(\n            npath,\n            [&sub_list](const char *fpath, int typeflag, struct FTW *ftwbuf) {\n                if (((typeflag == FTW_D) || (typeflag == FTW_DP)) && (ftwbuf->level > 0)) {\n                    sub_list.push_back(fpath);\n                }\n\n                return FTW_CONTINUE;\n            },\n            recursive);\n        break;\n\n    case FTW_NS:\n        ret = dsn::utils::filesystem::file_tree_walk(\n            npath,\n            [&sub_list](const char *fpath, int typeflag, struct FTW *ftwbuf) {\n                if (ftwbuf->level > 0) {\n                    sub_list.push_back(fpath);\n                }\n\n                return FTW_CONTINUE;\n            },\n            recursive);\n        break;\n\n    default:\n        ret = false;\n        break;\n    }\n\n    return ret;\n}\n\nbool get_subfiles(const std::string &path, std::vector<std::string> &sub_list, bool recursive)\n{\n    return dsn::utils::filesystem::get_subpaths(path, sub_list, recursive, FTW_F);\n}\n\nbool get_subdirectories(const std::string &path, std::vector<std::string> &sub_list, bool recursive)\n{\n    return dsn::utils::filesystem::get_subpaths(path, sub_list, recursive, FTW_D);\n}\n\nbool get_subpaths(const std::string &path, std::vector<std::string> &sub_list, bool recursive)\n{\n    return dsn::utils::filesystem::get_subpaths(path, sub_list, recursive, FTW_NS);\n}\n\nstatic bool remove_directory(const std::string &npath)\n{\n    boost::system::error_code ec;\n    boost::filesystem::remove_all(npath, ec);\n    // TODO(wutao1): return the specific error to caller\n    if (dsn_unlikely(bool(ec))) {\n        dwarn(\"remove %s failed, err = %s\", npath.c_str(), ec.message().c_str());\n        return false;\n    }\n    return true;\n}\n\nbool remove_path(const std::string &path)\n{\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    if (dsn::utils::filesystem::path_exists_internal(npath, FTW_F)) {\n        bool ret = (::remove(npath.c_str()) == 0);\n        if (!ret) {\n            dwarn(\"remove file %s failed, err = %s\", path.c_str(), safe_strerror(errno).c_str());\n        }\n        return ret;\n    } else if (dsn::utils::filesystem::path_exists_internal(npath, FTW_D)) {\n        return dsn::utils::filesystem::remove_directory(npath);\n    } else {\n        return true;\n    }\n}\n\nbool rename_path(const std::string &path1, const std::string &path2)\n{\n    bool ret;\n\n    ret = (::rename(path1.c_str(), path2.c_str()) == 0);\n    if (!ret) {\n        dwarn(\"rename from '%s' to '%s' failed, err = %s\",\n              path1.c_str(),\n              path2.c_str(),\n              safe_strerror(errno).c_str());\n    }\n\n    return ret;\n}\n\nbool file_size(const std::string &path, int64_t &sz)\n{\n    struct stat_ st;\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    err = dsn::utils::filesystem::get_stat_internal(npath, st);\n    if (err != 0) {\n        return false;\n    }\n\n    if (!S_ISREG(st.st_mode)) {\n        return false;\n    }\n\n    sz = st.st_size;\n\n    return true;\n}\n\nstatic int create_directory_component(const std::string &npath)\n{\n    int err;\n\n    if (::mkdir_(npath.c_str()) == 0) {\n        return 0;\n    }\n\n    err = errno;\n    if (err != EEXIST) {\n        return err;\n    }\n\n    return (dsn::utils::filesystem::path_exists_internal(npath, FTW_F) ? EEXIST : 0);\n}\n\nbool create_directory(const std::string &path)\n{\n    size_t prev = 0;\n    size_t pos;\n    char sep;\n    std::string npath;\n    std::string cpath;\n    size_t len;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    err = dsn::utils::filesystem::create_directory_component(npath);\n    if (err == 0) {\n        return true;\n    } else if (err != ENOENT) {\n        cpath = path;\n        goto out_error;\n    }\n\n    len = npath.length();\n    sep = _FS_SLASH;\n    if (npath[0] == sep) {\n        prev = 1;\n    }\n\n    while ((pos = npath.find_first_of(sep, prev)) != std::string::npos) {\n        cpath = npath.substr(0, pos++);\n        prev = pos;\n\n        err = dsn::utils::filesystem::create_directory_component(cpath);\n        if (err != 0) {\n            goto out_error;\n        }\n    }\n\n    if (prev < len) {\n        err = dsn::utils::filesystem::create_directory_component(npath);\n        if (err != 0) {\n            cpath = npath;\n            goto out_error;\n        }\n    }\n\n    return true;\n\nout_error:\n    dwarn(\"create_directory %s failed due to cannot create the component: %s, err = %s\",\n          path.c_str(),\n          cpath.c_str(),\n          safe_strerror(err).c_str());\n    return false;\n}\n\nbool create_file(const std::string &path)\n{\n    size_t pos;\n    std::string npath;\n    int fd;\n    int mode;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    if (_FS_ISSEP(path.back())) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    if (dsn::utils::filesystem::path_exists_internal(npath, FTW_F)) {\n        return true;\n    }\n\n    if (dsn::utils::filesystem::path_exists_internal(npath, FTW_D)) {\n        return false;\n    }\n\n    pos = npath.find_last_of(\"\\\\/\");\n    if ((pos != std::string::npos) && (pos > 0)) {\n        auto ppath = npath.substr(0, pos);\n        if (!dsn::utils::filesystem::create_directory(ppath)) {\n            return false;\n        }\n    }\n\n    mode = 0775;\n    fd = ::creat(npath.c_str(), mode);\n    if (fd == -1) {\n        err = errno;\n        dwarn(\"create_file %s failed, err = %s\", path.c_str(), safe_strerror(err).c_str());\n        return false;\n    }\n\n    if (::close_(fd) != 0) {\n        dwarn(\"create_file %s, failed to close the file handle.\", path.c_str());\n    }\n\n    return true;\n}\n\nbool get_absolute_path(const std::string &path1, std::string &path2)\n{\n    bool succ;\n    succ = (::realpath(path1.c_str(), tls_path_buffer) != nullptr);\n    if (succ) {\n        path2 = tls_path_buffer;\n    }\n\n    return succ;\n}\n\nstd::string remove_file_name(const std::string &path)\n{\n    size_t len;\n    size_t pos;\n\n    len = path.length();\n    if (len == 0) {\n        return \"\";\n    }\n\n    pos = path.find_last_of(\"\\\\/\");\n    if (pos == std::string::npos) {\n        return \"\";\n    }\n\n    if (pos == len) {\n        return path;\n    }\n\n    return path.substr(0, pos);\n}\n\nstd::string get_file_name(const std::string &path)\n{\n    size_t len;\n    size_t last;\n    size_t pos;\n\n    len = path.length();\n    if (len == 0) {\n        return \"\";\n    }\n\n    last = len - 1;\n\n    pos = path.find_last_of(\"\\\\/\");\n\n    if (pos == last) {\n        return \"\";\n    }\n\n    if (pos == std::string::npos) {\n        return path;\n    }\n\n    return path.substr((pos + 1), (len - pos));\n}\n\nstd::string path_combine(const std::string &path1, const std::string &path2)\n{\n    int err;\n    std::string path3;\n    std::string npath;\n\n    if (path1.empty()) {\n        err = dsn::utils::filesystem::get_normalized_path(path2, npath);\n    } else if (path2.empty()) {\n        err = dsn::utils::filesystem::get_normalized_path(path1, npath);\n    } else {\n        path3 = path1;\n        path3.append(1, _FS_SLASH);\n        path3.append(path2);\n\n        err = dsn::utils::filesystem::get_normalized_path(path3, npath);\n    }\n\n    return ((err == 0) ? npath : \"\");\n}\n\nbool get_current_directory(std::string &path)\n{\n    bool succ;\n\n    succ = (::getcwd_(tls_path_buffer, TLS_PATH_BUFFER_SIZE) != nullptr);\n    if (succ) {\n        path = tls_path_buffer;\n    }\n\n    return succ;\n}\n\nbool last_write_time(const std::string &path, time_t &tm)\n{\n    struct stat_ st;\n    std::string npath;\n    int err;\n\n    if (path.empty()) {\n        return false;\n    }\n\n    err = get_normalized_path(path, npath);\n    if (err != 0) {\n        return false;\n    }\n\n    err = dsn::utils::filesystem::get_stat_internal(npath, st);\n    if (err != 0) {\n        return false;\n    }\n\n    tm = st.st_mtime;\n\n    return true;\n}\n\nerror_code get_process_image_path(int pid, std::string &path)\n{\n    if (pid < -1) {\n        return ERR_INVALID_PARAMETERS;\n    }\n\n    int err;\n\n    char tmp[48];\n\n    err = snprintf_p(\n        tmp, ARRAYSIZE(tmp), \"/proc/%s/exe\", (pid == -1) ? \"self\" : std::to_string(pid).c_str());\n    dassert(err >= 0, \"snprintf_p failed.\");\n\n    err = (int)readlink(tmp, tls_path_buffer, TLS_PATH_BUFFER_SIZE);\n    if (err == -1) {\n        return ERR_PATH_NOT_FOUND;\n    }\n\n    tls_path_buffer[err] = 0;\n    path = tls_path_buffer;\n\n    return ERR_OK;\n}\n\nbool get_disk_space_info(const std::string &path, disk_space_info &info)\n{\n    FAIL_POINT_INJECT_F(\"filesystem_get_disk_space_info\", [&info](string_view str) {\n        info.capacity = 100 * 1024 * 1024;\n        if (str.find(\"insufficient\") != string_view::npos) {\n            info.available = 5 * 1024 * 1024;\n        } else {\n            info.available = 50 * 1024 * 1024;\n        }\n        return true;\n    });\n\n    boost::system::error_code ec;\n    boost::filesystem::space_info in = boost::filesystem::space(path, ec);\n    if (ec) {\n        derror(\n            \"get disk space info failed: path = %s, err = %s\", path.c_str(), ec.message().c_str());\n        return false;\n    } else {\n        info.capacity = in.capacity;\n        info.available = in.available;\n        return true;\n    }\n}\n\nbool link_file(const std::string &src, const std::string &target)\n{\n    if (src.empty() || target.empty())\n        return false;\n    if (!file_exists(src) || file_exists(target))\n        return false;\n    int err = 0;\n    err = ::link(src.c_str(), target.c_str());\n    return (err == 0);\n}\n\nerror_code md5sum(const std::string &file_path, /*out*/ std::string &result)\n{\n    result.clear();\n    // if file not exist, we return ERR_OBJECT_NOT_FOUND\n    if (!::dsn::utils::filesystem::file_exists(file_path)) {\n        derror(\"md5sum error: file %s not exist\", file_path.c_str());\n        return ERR_OBJECT_NOT_FOUND;\n    }\n\n    FILE *fp = fopen(file_path.c_str(), \"rb\");\n    if (fp == nullptr) {\n        derror(\"md5sum error: open file %s failed\", file_path.c_str());\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    char buf[4096];\n    unsigned char out[MD5_DIGEST_LENGTH];\n    MD5_CTX c;\n    MD5_Init(&c);\n    while (true) {\n        size_t ret_code = fread(buf, sizeof(char), 4096, fp);\n        if (ret_code == 4096) {\n            MD5_Update(&c, buf, 4096);\n        } else {\n            if (feof(fp)) {\n                if (ret_code > 0)\n                    MD5_Update(&c, buf, ret_code);\n                break;\n            } else {\n                int err = ferror(fp);\n                derror(\"md5sum error: read file %s failed: errno = %d (%s)\",\n                       file_path.c_str(),\n                       err,\n                       safe_strerror(err).c_str());\n                fclose(fp);\n                MD5_Final(out, &c);\n                return ERR_FILE_OPERATION_FAILED;\n            }\n        }\n    }\n    fclose(fp);\n    MD5_Final(out, &c);\n\n    char str[MD5_DIGEST_LENGTH * 2 + 1];\n    str[MD5_DIGEST_LENGTH * 2] = 0;\n    for (int n = 0; n < MD5_DIGEST_LENGTH; n++)\n        sprintf(str + n + n, \"%02x\", out[n]);\n    result.assign(str);\n\n    return ERR_OK;\n}\n\nstd::pair<error_code, bool> is_directory_empty(const std::string &dirname)\n{\n    std::pair<error_code, bool> res;\n    res.first = ERR_OK;\n    std::vector<std::string> subfiles;\n    std::vector<std::string> subdirs;\n    if (get_subfiles(dirname, subfiles, false) && get_subdirectories(dirname, subdirs, false)) {\n        res.second = subfiles.empty() && subdirs.empty();\n    } else {\n        res.first = ERR_FILE_OPERATION_FAILED;\n    }\n    return res;\n}\n\nerror_code read_file(const std::string &fname, std::string &buf)\n{\n    if (!file_exists(fname)) {\n        derror_f(\"file({}) doesn't exist\", fname);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    int64_t file_sz = 0;\n    if (!file_size(fname, file_sz)) {\n        derror_f(\"get file({}) size failed\", fname);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n\n    buf.resize(file_sz);\n    std::ifstream fin(fname, std::ifstream::in);\n    if (!fin.is_open()) {\n        derror_f(\"open file({}) failed\", fname);\n        return ERR_FILE_OPERATION_FAILED;\n    }\n    fin.read(&buf[0], file_sz);\n    dassert_f(file_sz == fin.gcount(),\n              \"read file({}) failed, file_size = {} but read size = {}\",\n              fname,\n              file_sz,\n              fin.gcount());\n    fin.close();\n    return ERR_OK;\n}\n\nbool verify_file(const std::string &fname,\n                 const std::string &expected_md5,\n                 const int64_t &expected_fsize)\n{\n    if (!file_exists(fname)) {\n        derror_f(\"file({}) is not existed\", fname);\n        return false;\n    }\n    int64_t f_size = 0;\n    if (!file_size(fname, f_size)) {\n        derror_f(\"verify file({}) failed, becaused failed to get file size\", fname);\n        return false;\n    }\n    std::string md5;\n    if (md5sum(fname, md5) != ERR_OK) {\n        derror_f(\"verify file({}) failed, becaused failed to get file md5\", fname);\n        return false;\n    }\n    if (f_size != expected_fsize || md5 != expected_md5) {\n        derror_f(\"verify file({}) failed, because file damaged, size: {} VS {}, md5: {} VS {}\",\n                 fname,\n                 f_size,\n                 expected_fsize,\n                 md5,\n                 expected_md5);\n        return false;\n    }\n    return true;\n}\n\nbool verify_file_size(const std::string &fname, const int64_t &expected_fsize)\n{\n    if (!file_exists(fname)) {\n        derror_f(\"file({}) is not existed\", fname);\n        return false;\n    }\n    int64_t f_size = 0;\n    if (!file_size(fname, f_size)) {\n        derror_f(\"verify file({}) size failed, becaused failed to get file size\", fname);\n        return false;\n    }\n    if (f_size != expected_fsize) {\n        derror_f(\"verify file({}) size failed, because file damaged, size: {} VS {}\",\n                 fname,\n                 f_size,\n                 expected_fsize);\n        return false;\n    }\n    return true;\n}\n\nbool verify_data_md5(const std::string &fname,\n                     const char *data,\n                     const size_t data_size,\n                     const std::string &expected_md5)\n{\n    std::string md5 = string_md5(data, data_size);\n    if (md5 != expected_md5) {\n        derror_f(\"verify data({}) failed, because data damaged, size: md5: {} VS {}\",\n                 fname,\n                 md5,\n                 expected_md5);\n        return false;\n    }\n    return true;\n}\n\nbool create_directory(const std::string &path, std::string &absolute_path, std::string &err_msg)\n{\n    FAIL_POINT_INJECT_F(\"filesystem_create_directory\", [path](string_view str) {\n        // when str contains 'false', and path contains broken_disk_dir, mock create fail(return\n        // false)\n        std::string broken_disk_dir = \"disk1\";\n        return str.find(\"false\") == string_view::npos ||\n               path.find(broken_disk_dir) == std::string::npos;\n    });\n\n    if (!create_directory(path)) {\n        err_msg = fmt::format(\"Fail to create directory {}.\", path);\n        return false;\n    }\n    if (!get_absolute_path(path, absolute_path)) {\n        err_msg = fmt::format(\"Fail to get absolute path from {}.\", path);\n        return false;\n    }\n    return true;\n}\n\nbool write_file(const std::string &fname, std::string &buf)\n{\n    if (!file_exists(fname)) {\n        derror_f(\"file({}) doesn't exist\", fname);\n        return false;\n    }\n\n    std::ofstream fstream;\n    fstream.open(fname.c_str());\n    fstream << buf;\n    fstream.close();\n    return true;\n}\n\nbool check_dir_rw(const std::string &path, std::string &err_msg)\n{\n    FAIL_POINT_INJECT_F(\"filesystem_check_dir_rw\", [path](string_view str) {\n        // when str contains 'false', and path contains broken_disk_dir, mock check fail(return\n        // false)\n        std::string broken_disk_dir = \"disk1\";\n        return str.find(\"false\") == string_view::npos ||\n               path.find(broken_disk_dir) == std::string::npos;\n    });\n\n    std::string fname = \"read_write_test_file\";\n    std::string fpath = path_combine(path, fname);\n    if (!create_file(fpath)) {\n        err_msg = fmt::format(\"Fail to create test file {}.\", fpath);\n        return false;\n    }\n\n    auto cleanup = defer([&fpath]() { remove_path(fpath); });\n    std::string value = \"test_value\";\n    if (!write_file(fpath, value)) {\n        err_msg = fmt::format(\"Fail to write file {}.\", fpath);\n        return false;\n    }\n\n    std::string buf;\n    if (read_file(fpath, buf) != ERR_OK || buf != value) {\n        err_msg = fmt::format(\"Fail to read file {} or get wrong value({}).\", fpath, buf);\n        return false;\n    }\n\n    return true;\n}\n\n} // namespace filesystem\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/flags.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <boost/algorithm/string/join.hpp>\n\n#include <dsn/utility/flags.h>\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/errors.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/join_point.h>\n#include <dsn/c/api_utilities.h>\n#include <boost/optional/optional.hpp>\n#include <dsn/dist/fmt_logging.h>\n\n#include <map>\n#include <dsn/utility/output_utils.h>\n\nnamespace dsn {\n\nenum value_type\n{\n    FV_BOOL = 0,\n    FV_INT32 = 1,\n    FV_UINT32 = 2,\n    FV_INT64 = 3,\n    FV_UINT64 = 4,\n    FV_DOUBLE = 5,\n    FV_STRING = 6,\n    FV_MAX_INDEX = 6,\n};\n\nENUM_BEGIN(value_type, FV_MAX_INDEX)\nENUM_REG(FV_BOOL)\nENUM_REG(FV_INT32)\nENUM_REG(FV_UINT32)\nENUM_REG(FV_INT64)\nENUM_REG(FV_UINT64)\nENUM_REG(FV_DOUBLE)\nENUM_REG(FV_STRING)\nENUM_END(value_type)\n\nclass flag_data\n{\npublic:\n#define FLAG_DATA_LOAD_CASE(type, type_enum, suffix)                                               \\\n    case type_enum:                                                                                \\\n        value<type>() = dsn_config_get_value_##suffix(_section, _name, value<type>(), _desc);      \\\n        if (_validator) {                                                                          \\\n            dassert_f(_validator(), \"validation failed: {}\", _name);                               \\\n        }                                                                                          \\\n        break\n\n#define FLAG_DATA_UPDATE_CASE(type, type_enum, suffix)                                             \\\n    case type_enum: {                                                                              \\\n        type old_val = value<type>(), tmpval_##type_enum;                                          \\\n        if (!dsn::buf2##suffix(val, tmpval_##type_enum)) {                                         \\\n            return error_s::make(ERR_INVALID_PARAMETERS, fmt::format(\"{} is invalid\", val));       \\\n        }                                                                                          \\\n        value<type>() = tmpval_##type_enum;                                                        \\\n        if (_validator && !_validator()) {                                                         \\\n            value<type>() = old_val;                                                               \\\n            return error_s::make(ERR_INVALID_PARAMETERS, \"value validation failed\");               \\\n        }                                                                                          \\\n        std::string total_message;                                                                 \\\n        if (!on_update_value.execute(&total_message, true)) {                                      \\\n            value<type>() = old_val;                                                               \\\n            return error_s::make(ERR_INVALID_PARAMETERS, total_message.c_str());                   \\\n        }                                                                                          \\\n    } break\n\n#define FLAG_DATA_UPDATE_STRING()                                                                  \\\n    case FV_STRING:                                                                                \\\n        return error_s::make(ERR_INVALID_PARAMETERS, \"string modifications are not supported\")\n\n    void load()\n    {\n        switch (_type) {\n            FLAG_DATA_LOAD_CASE(int32_t, FV_INT32, int64);\n            FLAG_DATA_LOAD_CASE(int64_t, FV_INT64, int64);\n            FLAG_DATA_LOAD_CASE(uint32_t, FV_UINT32, uint64);\n            FLAG_DATA_LOAD_CASE(uint64_t, FV_UINT64, uint64);\n            FLAG_DATA_LOAD_CASE(bool, FV_BOOL, bool);\n            FLAG_DATA_LOAD_CASE(double, FV_DOUBLE, double);\n            FLAG_DATA_LOAD_CASE(const char *, FV_STRING, string);\n        }\n    }\n\n    flag_data(const char *section, const char *name, const char *desc, value_type type, void *val)\n        : on_update_value(\"flag.data.value.update\"),\n          _type(type),\n          _val(val),\n          _section(section),\n          _name(name),\n          _desc(desc)\n    {\n    }\n\n    error_s update(const std::string &val)\n    {\n        if (!has_tag(flag_tag::FT_MUTABLE)) {\n            return error_s::make(ERR_INVALID_PARAMETERS, fmt::format(\"{} is not mutable\", _name));\n        }\n\n        switch (_type) {\n            FLAG_DATA_UPDATE_CASE(int32_t, FV_INT32, int32);\n            FLAG_DATA_UPDATE_CASE(int64_t, FV_INT64, int64);\n            FLAG_DATA_UPDATE_CASE(uint32_t, FV_UINT32, uint32);\n            FLAG_DATA_UPDATE_CASE(uint64_t, FV_UINT64, uint64);\n            FLAG_DATA_UPDATE_CASE(bool, FV_BOOL, bool);\n            FLAG_DATA_UPDATE_CASE(double, FV_DOUBLE, double);\n            FLAG_DATA_UPDATE_STRING();\n        }\n        return error_s::make(ERR_OK);\n    }\n\n    void set_validator(validator_fn &validator) { _validator = std::move(validator); }\n    const validator_fn &validator() const { return _validator; }\n\n    void add_tag(const flag_tag &tag) { _tags.insert(tag); }\n    bool has_tag(const flag_tag &tag) const { return _tags.find(tag) != _tags.end(); }\n\n    void to_table_printer(utils::table_printer &tp) const\n    {\n#define TABLE_PRINTER_ADD_VALUE(type, type_enum)                                                   \\\n    case type_enum:                                                                                \\\n        tp.add_row_name_and_data(\"value\", value<type>());                                          \\\n        break;\n\n        tp.add_row_name_and_data(\"name\", _name);\n        tp.add_row_name_and_data(\"section\", _section);\n        tp.add_row_name_and_data(\"type\", enum_to_string(_type));\n        tp.add_row_name_and_data(\"tags\", tags_str());\n        tp.add_row_name_and_data(\"description\", _desc);\n        switch (_type) {\n            TABLE_PRINTER_ADD_VALUE(bool, FV_BOOL);\n            TABLE_PRINTER_ADD_VALUE(int32_t, FV_INT32);\n            TABLE_PRINTER_ADD_VALUE(uint32_t, FV_UINT32);\n            TABLE_PRINTER_ADD_VALUE(int64_t, FV_INT64);\n            TABLE_PRINTER_ADD_VALUE(uint64_t, FV_UINT64);\n            TABLE_PRINTER_ADD_VALUE(double, FV_DOUBLE);\n            TABLE_PRINTER_ADD_VALUE(const char *, FV_STRING);\n        }\n    }\n\n    std::string to_json() const\n    {\n        utils::table_printer tp;\n        to_table_printer(tp);\n        std::ostringstream out;\n        tp.output(out, utils::table_printer::output_format::kJsonCompact);\n        return out.str();\n    }\n\npublic:\n    join_point<bool, std::string *> on_update_value;\n\nprivate:\n    template <typename T>\n    T &value() const\n    {\n        return *reinterpret_cast<T *>(_val);\n    }\n\n    std::string tags_str() const\n    {\n        std::string tags_str;\n        for (const auto &tag : _tags) {\n            tags_str += enum_to_string(tag);\n            tags_str += \",\";\n        }\n        if (!tags_str.empty()) {\n            tags_str.pop_back();\n        }\n\n        return tags_str;\n    }\n\nprivate:\n    const value_type _type;\n    void *const _val;\n    const char *_section;\n    const char *_name;\n    const char *_desc;\n    validator_fn _validator;\n    std::unordered_set<flag_tag> _tags;\n};\n\nclass flag_registry : public utils::singleton<flag_registry>\n{\npublic:\n    bool run_group_validators(std::map<std::string, std::string> &validate_messages)\n    {\n        bool valid = true;\n\n        for (const auto &validator : _group_flag_validators) {\n            std::string message;\n            if (!validator.second(message)) {\n                valid = false;\n                validate_messages[validator.first] = message;\n            }\n        }\n\n        return valid;\n    }\n\n    bool run_group_validators(std::string *total_message)\n    {\n        std::map<std::string, std::string> validate_messages;\n        bool valid = run_group_validators(validate_messages);\n\n        if (!valid && total_message != nullptr) {\n            std::vector<std::string> messages;\n            std::transform(validate_messages.begin(),\n                           validate_messages.end(),\n                           std::back_inserter(messages),\n                           [](const std::pair<std::string, std::string> &message) {\n                               std::string base(\n                                   fmt::format(\"group validator \\\"{}\\\" failed\", message.first));\n                               if (message.second.empty()) {\n                                   return base;\n                               }\n                               return fmt::format(\"{}: \\\"{}\\\"\", base, message.second);\n                           });\n\n            *total_message = boost::join(messages, \"; \");\n        }\n\n        return valid;\n    }\n\n    void add_flag(const char *name, flag_data flag)\n    {\n        // We should run all group validators to find the potential inconsistency\n        auto group_validators_runner = std::bind<bool (flag_registry::*)(std::string *)>(\n            &flag_registry::run_group_validators, this, std::placeholders::_1);\n        flag.on_update_value.put_native(group_validators_runner);\n\n        _flags.emplace(name, flag);\n    }\n\n    error_s update_flag(const std::string &name, const std::string &val)\n    {\n        auto it = _flags.find(name);\n        if (it == _flags.end()) {\n            return error_s::make(ERR_OBJECT_NOT_FOUND, fmt::format(\"{} is not found\", name));\n        }\n        return it->second.update(val);\n    }\n\n    void add_validator(const char *name, validator_fn &validator)\n    {\n        auto it = _flags.find(name);\n        dassert(it != _flags.end(), \"flag \\\"%s\\\" does not exist\", name);\n        flag_data &flag = it->second;\n        if (!flag.validator()) {\n            flag.set_validator(validator);\n        }\n    }\n\n    void add_group_validator(const char *name, group_validator_fn &validator)\n    {\n        auto it = _group_flag_validators.find(name);\n        dassert_f(\n            it == _group_flag_validators.end(), \"duplicate group flag validator \\\"{}\\\"\", name);\n        _group_flag_validators[name] = validator;\n    }\n\n    void load_from_config()\n    {\n        for (auto &kv : _flags) {\n            flag_data &flag = kv.second;\n            flag.load();\n        }\n\n        std::string total_message;\n        if (!run_group_validators(&total_message)) {\n            dassert_f(false, \"{}\", total_message);\n        }\n    }\n\n    void add_tag(const char *name, const flag_tag &tag)\n    {\n        auto it = _flags.find(name);\n        dassert(it != _flags.end(), \"flag \\\"%s\\\" does not exist\", name);\n        it->second.add_tag(tag);\n    }\n\n    bool has_tag(const std::string &name, const flag_tag &tag) const\n    {\n        auto it = _flags.find(name);\n        if (it == _flags.end()) {\n            return false;\n        }\n        return it->second.has_tag(tag);\n    }\n\n    error_with<std::string> get_flag_str(const std::string &name) const\n    {\n        const auto iter = _flags.find(name);\n        if (iter == _flags.end()) {\n            return error_s::make(ERR_OBJECT_NOT_FOUND, fmt::format(\"{} is not found\", name));\n        }\n\n        return iter->second.to_json();\n    }\n\n    std::string list_all_flags() const\n    {\n        utils::multi_table_printer mtp;\n        for (const auto &flag : _flags) {\n            utils::table_printer tp(flag.first);\n            flag.second.to_table_printer(tp);\n            mtp.add(std::move(tp));\n        }\n\n        std::ostringstream out;\n        mtp.output(out, utils::table_printer::output_format::kJsonCompact);\n        return out.str();\n    }\n\nprivate:\n    friend class utils::singleton<flag_registry>;\n    flag_registry() = default;\n    ~flag_registry() = default;\n\nprivate:\n    std::map<std::string, flag_data> _flags;\n    std::map<std::string, group_validator_fn> _group_flag_validators;\n};\n\n#define FLAG_REG_CONSTRUCTOR(type, type_enum)                                                      \\\n    flag_registerer::flag_registerer(                                                              \\\n        const char *section, const char *name, const char *desc, type *val)                        \\\n    {                                                                                              \\\n        flag_registry::instance().add_flag(name, flag_data(section, name, desc, type_enum, val));  \\\n    }\n\nFLAG_REG_CONSTRUCTOR(int32_t, FV_INT32);\nFLAG_REG_CONSTRUCTOR(uint32_t, FV_UINT32);\nFLAG_REG_CONSTRUCTOR(int64_t, FV_INT64);\nFLAG_REG_CONSTRUCTOR(uint64_t, FV_UINT64);\nFLAG_REG_CONSTRUCTOR(bool, FV_BOOL);\nFLAG_REG_CONSTRUCTOR(double, FV_DOUBLE);\nFLAG_REG_CONSTRUCTOR(const char *, FV_STRING);\n\nflag_validator::flag_validator(const char *name, validator_fn validator)\n{\n    flag_registry::instance().add_validator(name, validator);\n}\n\ngroup_flag_validator::group_flag_validator(const char *name, group_validator_fn validator)\n{\n    flag_registry::instance().add_group_validator(name, validator);\n}\n\nflag_tagger::flag_tagger(const char *name, const flag_tag &tag)\n{\n    flag_registry::instance().add_tag(name, tag);\n}\n\n/*extern*/ void flags_initialize() { flag_registry::instance().load_from_config(); }\n\n/*extern*/ error_s update_flag(const std::string &name, const std::string &val)\n{\n    return flag_registry::instance().update_flag(name, val);\n}\n\n/*extern*/ bool has_tag(const std::string &name, const flag_tag &tag)\n{\n    return flag_registry::instance().has_tag(name, tag);\n}\n\n/*extern*/ error_with<std::string> get_flag_str(const std::string &flag_name)\n{\n    return flag_registry::instance().get_flag_str(flag_name);\n}\n\n/*extern*/ std::string list_all_flags() { return flag_registry::instance().list_all_flags(); }\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/gpid.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#include <dsn/utility/fixed_size_buffer_pool.h>\n#include <dsn/tool-api/gpid.h>\n#include <cstring>\n\nnamespace dsn {\n\nbool gpid::parse_from(const char *str)\n{\n    return sscanf(str, \"%d.%d\", &_value.u.app_id, &_value.u.partition_index) == 2;\n}\n\nstatic __thread fixed_size_buffer_pool<8, 64> bf;\nconst char *gpid::to_string() const\n{\n    char *b = bf.next();\n    snprintf(b, bf.get_chunk_size(), \"%d.%d\", _value.u.app_id, _value.u.partition_index);\n    return b;\n}\n}\n"
  },
  {
    "path": "src/utils/latency_tracer.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utils/latency_tracer.h>\n#include <dsn/perf_counter/perf_counters.h>\n#include <dsn/service_api_c.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/config_api.h>\n#include <dsn/utility/flags.h>\n\n#include <utility>\n#include \"lockp.std.h\"\n#include \"shared_io_service.h\"\n\nnamespace dsn {\nnamespace utils {\n\nDSN_DEFINE_bool(\"replication\",\n                enable_latency_tracer,\n                false,\n                \"whether enable the global latency tracer\");\nDSN_TAG_VARIABLE(enable_latency_tracer, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"replication\",\n                enable_latency_tracer_report,\n                false,\n                \"whether open the latency tracer report perf counter\");\nDSN_TAG_VARIABLE(enable_latency_tracer_report, FT_MUTABLE);\n\nDSN_DEFINE_string(\"replication\",\n                  latency_tracer_counter_name_prefix,\n                  \"trace_latency\",\n                  \"perf counter common name prefix\");\n\nutils::rw_lock_nr counter_lock; //{\nstd::unordered_map<std::string, perf_counter_ptr> counters_trace_latency;\n// }\n\nutils::rw_lock_nr task_code_lock; //{\nstd::unordered_map<std::string, bool> task_codes;\n// }\n\nperf_counter_ptr get_trace_counter(const std::string &name)\n{\n    {\n        utils::auto_read_lock read(counter_lock);\n        auto iter = counters_trace_latency.find(name);\n        if (iter != counters_trace_latency.end()) {\n            return iter->second;\n        }\n    }\n\n    utils::auto_write_lock write(counter_lock);\n    auto iter = counters_trace_latency.find(name);\n    if (iter != counters_trace_latency.end()) {\n        return iter->second;\n    }\n\n    auto perf_counter =\n        dsn::perf_counters::instance().get_app_counter(FLAGS_latency_tracer_counter_name_prefix,\n                                                       name.c_str(),\n                                                       COUNTER_TYPE_NUMBER_PERCENTILES,\n                                                       name.c_str(),\n                                                       true);\n\n    counters_trace_latency.emplace(name, perf_counter);\n    return perf_counter;\n}\n\nbool is_enable_trace(const dsn::task_code &code)\n{\n    if (!FLAGS_enable_latency_tracer) {\n        return false;\n    }\n\n    if (code == LPC_LATENCY_TRACE) {\n        return true;\n    }\n\n    std::string code_name(dsn::task_code(code).to_string());\n    {\n        utils::auto_read_lock read(task_code_lock);\n        auto iter = task_codes.find(code_name);\n        if (iter != task_codes.end()) {\n            return iter->second;\n        }\n    }\n\n    utils::auto_write_lock write(task_code_lock);\n    auto iter = task_codes.find(code_name);\n    if (iter != task_codes.end()) {\n        return iter->second;\n    }\n\n    std::string section_name = std::string(\"task.\") + code_name;\n    auto enable_trace = dsn_config_get_value_bool(\n        section_name.c_str(), \"enable_trace\", false, \"whether to enable trace this kind of task\");\n\n    task_codes.emplace(code_name, enable_trace);\n    return enable_trace;\n}\n\nlatency_tracer::latency_tracer(bool is_sub,\n                               std::string name,\n                               uint64_t threshold,\n                               const dsn::task_code &code)\n    : _is_sub(is_sub),\n      _name(std::move(name)),\n      _description(\"default\"),\n      _threshold(threshold),\n      _start_time(dsn_now_ns()),\n      _last_time(_start_time),\n      _task_code(code),\n      _enable_trace(is_enable_trace(code))\n{\n    append_point(fmt::format(\"{}:{}:{}\", __FILENAME__, __LINE__, __FUNCTION__), _start_time);\n}\n\nlatency_tracer::~latency_tracer()\n{\n    if (!_enable_trace || _is_sub) {\n        return;\n    }\n\n    std::string traces;\n    dump_trace_points(traces);\n}\n\nvoid latency_tracer::add_point(const std::string &stage_name)\n{\n    if (!_enable_trace) {\n        return;\n    }\n\n    uint64_t ts = dsn_now_ns();\n    utils::auto_write_lock write(_point_lock);\n    _points.emplace(ts, stage_name);\n    _last_time = ts;\n    _last_stage = stage_name;\n}\n\nvoid latency_tracer::append_point(const std::string &stage_name, uint64_t timestamp)\n{\n    if (!_enable_trace) {\n        return;\n    }\n\n    utils::auto_write_lock write(_point_lock);\n    uint64_t cur_ts = timestamp > _last_time ? timestamp : _last_time + 1;\n    _points.emplace(cur_ts, stage_name);\n    _last_time = cur_ts;\n    _last_stage = stage_name;\n}\n\nvoid latency_tracer::add_sub_tracer(const std::string &name)\n{\n    if (!_enable_trace) {\n        return;\n    }\n\n    auto sub_tracer = std::make_shared<dsn::utils::latency_tracer>(true, name, 0);\n    sub_tracer->set_parent_point_name(_last_stage);\n    sub_tracer->set_description(_description);\n    utils::auto_write_lock write(_sub_lock);\n    _sub_tracers.emplace(name, sub_tracer);\n}\n\nvoid latency_tracer::add_sub_tracer(const std::shared_ptr<latency_tracer> &tracer)\n{\n    if (!_enable_trace) {\n        return;\n    }\n\n    utils::auto_write_lock write(_sub_lock);\n    _sub_tracers.emplace(tracer->name(), tracer);\n}\n\nstd::shared_ptr<latency_tracer> latency_tracer::sub_tracer(const std::string &name)\n{\n    if (!_enable_trace) {\n        return nullptr;\n    }\n\n    utils::auto_read_lock read(_sub_lock);\n    auto iter = _sub_tracers.find(name);\n    if (iter != _sub_tracers.end()) {\n        return iter->second;\n    }\n    dwarn_f(\"can't find the [{}] sub tracer of {}\", name, _name);\n    return nullptr;\n}\n\nvoid latency_tracer::dump_trace_points(/*out*/ std::string &traces)\n{\n    if (!_enable_trace || _threshold < 0) {\n        return;\n    }\n\n    uint64_t total_time_used;\n    {\n        utils::auto_read_lock point_lock(_point_lock);\n        if (_points.empty()) {\n            return;\n        }\n\n        uint64_t start_time = _points.begin()->first;\n        total_time_used = _points.rbegin()->first - start_time;\n        std::string header_format = _is_sub ? \"          \" : \"***************\";\n        traces.append(fmt::format(\"\\t{}[TRACE:[{}.{}]{}]{}\\n\",\n                                  header_format,\n                                  _description,\n                                  dsn::task_code(_task_code).to_string(),\n                                  _name,\n                                  header_format));\n        uint64_t previous_point_ts = _points.begin()->first;\n        std::string previous_point_name = _points.begin()->second;\n        for (const auto &point : _points) {\n            if (point.first == start_time) {\n                continue;\n            }\n            auto cur_point_ts = point.first;\n            auto cur_point_name = point.second;\n            auto span_duration = point.first - previous_point_ts;\n            auto total_latency = point.first - start_time;\n\n            if (FLAGS_enable_latency_tracer_report) {\n                std::string counter_name =\n                    fmt::format(\"[{}]{}@{}\", _description, previous_point_name, cur_point_name);\n                report_trace_point(counter_name, span_duration);\n            }\n\n            if (total_time_used >= _threshold) {\n                std::string trace_format = _is_sub ? \" \" : \"\";\n                std::string trace_name =\n                    _is_sub ? fmt::format(\"{}.{}\", _parent_point_name, cur_point_name)\n                            : cur_point_name;\n                std::string trace_log =\n                    fmt::format(\"\\t{}TRACE:name={:<110}, span={:>20}, total={:>20}, \"\n                                \"ts={:<20}\\n\",\n                                trace_format,\n                                trace_name,\n                                span_duration,\n                                total_latency,\n                                cur_point_ts);\n                traces.append(trace_log);\n            }\n\n            previous_point_ts = cur_point_ts;\n            previous_point_name = cur_point_name;\n        }\n    }\n\n    {\n        utils::auto_read_lock tracer_lock(_sub_lock);\n        for (const auto &sub : _sub_tracers) {\n            sub.second->dump_trace_points(traces);\n        }\n    }\n\n    if (!_is_sub && total_time_used >= _threshold) {\n        dwarn_f(\"TRACE:the traces as fallow:\\n{}\", traces);\n        return;\n    }\n}\n\nvoid latency_tracer::report_trace_point(const std::string &name, uint64_t span)\n{\n    auto perf_counter = get_trace_counter(name);\n    if (perf_counter) {\n        perf_counter->set(span);\n    }\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/lockp.std.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <dsn/utility/synchronize.h>\n#include \"utils/zlock_provider.h\"\n\nnamespace dsn {\nnamespace tools {\n\nclass std_lock_provider : public lock_provider\n{\npublic:\n    std_lock_provider(lock_provider *inner_provider) : lock_provider(inner_provider) {}\n    virtual ~std_lock_provider() {}\n    virtual void lock() { _lock.lock(); }\n    virtual bool try_lock() { return _lock.try_lock(); }\n    virtual void unlock() { _lock.unlock(); }\n\nprivate:\n    utils::ex_lock _lock;\n};\n\nclass std_lock_nr_provider : public lock_nr_provider\n{\npublic:\n    std_lock_nr_provider(lock_nr_provider *inner_provider) : lock_nr_provider(inner_provider) {}\n    virtual ~std_lock_nr_provider() {}\n    virtual void lock() { _lock.lock(); }\n    virtual bool try_lock() { return _lock.try_lock(); }\n    virtual void unlock() { _lock.unlock(); }\n\nprivate:\n    utils::ex_lock_nr _lock;\n};\n\nclass std_rwlock_nr_provider : public rwlock_nr_provider\n{\npublic:\n    std_rwlock_nr_provider(rwlock_nr_provider *inner_provider) : rwlock_nr_provider(inner_provider)\n    {\n    }\n    virtual ~std_rwlock_nr_provider() {}\n    virtual void lock_read() { _lock.lock_read(); }\n    virtual void unlock_read() { _lock.unlock_read(); }\n    virtual bool try_lock_read() { return _lock.try_lock_read(); }\n\n    virtual void lock_write() { _lock.lock_write(); }\n    virtual void unlock_write() { _lock.unlock_write(); }\n    virtual bool try_lock_write() { return _lock.try_lock_write(); }\n\nprivate:\n    utils::rw_lock_nr _lock;\n};\n\nclass std_semaphore_provider : public semaphore_provider\n{\npublic:\n    std_semaphore_provider(int initial_count, semaphore_provider *inner_provider)\n        : semaphore_provider(initial_count, inner_provider), _sema(initial_count)\n    {\n    }\n\npublic:\n    virtual ~std_semaphore_provider() {}\n    virtual void signal(int count) { _sema.signal(count); }\n    virtual bool wait(int timeout_milliseconds) { return _sema.wait(timeout_milliseconds); }\n\nprivate:\n    dsn::utils::semaphore _sema;\n};\n}\n} // end namespace dsn::tools\n"
  },
  {
    "path": "src/utils/logging.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/tool-api/command_manager.h>\n#include <dsn/tool-api/logging_provider.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utility/smart_pointers.h>\n#include \"simple_logger.h\"\n\nDSN_API dsn_log_level_t dsn_log_start_level = dsn_log_level_t::LOG_LEVEL_INFORMATION;\nDSN_DEFINE_string(\"core\",\n                  logging_start_level,\n                  \"LOG_LEVEL_INFORMATION\",\n                  \"logs with level below this will not be logged\");\n\nDSN_DEFINE_bool(\"core\", logging_flush_on_exit, true, \"flush log when exit system\");\n\nnamespace dsn {\n\nusing namespace tools;\nDSN_REGISTER_COMPONENT_PROVIDER(screen_logger, \"dsn::tools::screen_logger\");\nDSN_REGISTER_COMPONENT_PROVIDER(simple_logger, \"dsn::tools::simple_logger\");\n\nstd::function<std::string()> log_prefixed_message_func = []() -> std::string { return \": \"; };\n\nvoid set_log_prefixed_message_func(std::function<std::string()> func)\n{\n    log_prefixed_message_func = func;\n}\n} // namespace dsn\n\nstatic void log_on_sys_exit(::dsn::sys_exit_type)\n{\n    dsn::logging_provider *logger = dsn::logging_provider::instance();\n    logger->flush();\n}\n\nvoid dsn_log_init(const std::string &logging_factory_name,\n                  const std::string &dir_log,\n                  std::function<std::string()> dsn_log_prefixed_message_func)\n{\n    dsn_log_start_level =\n        enum_from_string(FLAGS_logging_start_level, dsn_log_level_t::LOG_LEVEL_INVALID);\n\n    dassert(dsn_log_start_level != dsn_log_level_t::LOG_LEVEL_INVALID,\n            \"invalid [core] logging_start_level specified\");\n\n    // register log flush on exit\n    if (FLAGS_logging_flush_on_exit) {\n        ::dsn::tools::sys_exit.put_back(log_on_sys_exit, \"log.flush\");\n    }\n\n    dsn::logging_provider *logger = dsn::utils::factory_store<dsn::logging_provider>::create(\n        logging_factory_name.c_str(), dsn::PROVIDER_TYPE_MAIN, dir_log.c_str());\n    dsn::logging_provider::set_logger(logger);\n\n    // register command for logging\n    ::dsn::command_manager::instance().register_command(\n        {\"flush-log\"},\n        \"flush-log - flush log to stderr or log file\",\n        \"flush-log\",\n        [](const std::vector<std::string> &args) {\n            dsn::logging_provider *logger = dsn::logging_provider::instance();\n            logger->flush();\n            return \"Flush done.\";\n        });\n    ::dsn::command_manager::instance().register_command(\n        {\"reset-log-start-level\"},\n        \"reset-log-start-level - reset the log start level\",\n        \"reset-log-start-level [INFORMATION | DEBUG | WARNING | ERROR | FATAL]\",\n        [](const std::vector<std::string> &args) {\n            dsn_log_level_t start_level;\n            if (args.size() == 0) {\n                start_level =\n                    enum_from_string(FLAGS_logging_start_level, dsn_log_level_t::LOG_LEVEL_INVALID);\n            } else {\n                std::string level_str = \"LOG_LEVEL_\" + args[0];\n                start_level =\n                    enum_from_string(level_str.c_str(), dsn_log_level_t::LOG_LEVEL_INVALID);\n                if (start_level == dsn_log_level_t::LOG_LEVEL_INVALID) {\n                    return \"ERROR: invalid level '\" + args[0] + \"'\";\n                }\n            }\n            dsn_log_set_start_level(start_level);\n            return std::string(\"OK, current level is \") + enum_to_string(start_level);\n        });\n\n    if (dsn_log_prefixed_message_func != nullptr) {\n        dsn::set_log_prefixed_message_func(dsn_log_prefixed_message_func);\n    }\n}\n\nDSN_API dsn_log_level_t dsn_log_get_start_level() { return dsn_log_start_level; }\n\nDSN_API void dsn_log_set_start_level(dsn_log_level_t level) { dsn_log_start_level = level; }\n\nDSN_API void dsn_logv(const char *file,\n                      const char *function,\n                      const int line,\n                      dsn_log_level_t log_level,\n                      const char *fmt,\n                      va_list args)\n{\n    dsn::logging_provider *logger = dsn::logging_provider::instance();\n    logger->dsn_logv(file, function, line, log_level, fmt, args);\n}\n\nDSN_API void dsn_logf(const char *file,\n                      const char *function,\n                      const int line,\n                      dsn_log_level_t log_level,\n                      const char *fmt,\n                      ...)\n{\n    va_list ap;\n    va_start(ap, fmt);\n    dsn_logv(file, function, line, log_level, fmt, ap);\n    va_end(ap);\n}\n\nDSN_API void dsn_log(const char *file,\n                     const char *function,\n                     const int line,\n                     dsn_log_level_t log_level,\n                     const char *str)\n{\n    dsn::logging_provider *logger = dsn::logging_provider::instance();\n    logger->dsn_log(file, function, line, log_level, str);\n}\n\nnamespace dsn {\n\nstd::unique_ptr<logging_provider> logging_provider::_logger =\n    std::unique_ptr<logging_provider>(nullptr);\n\nlogging_provider *logging_provider::instance()\n{\n    static std::unique_ptr<logging_provider> default_logger =\n        std::unique_ptr<logging_provider>(create_default_instance());\n    return _logger ? _logger.get() : default_logger.get();\n}\n\nlogging_provider *logging_provider::create_default_instance()\n{\n    return new tools::screen_logger(true);\n}\n\nvoid logging_provider::set_logger(logging_provider *logger) { _logger.reset(logger); }\n\nnamespace tools {\nnamespace internal_use_only {\nbool register_component_provider(const char *name,\n                                 logging_provider::factory f,\n                                 ::dsn::provider_type type)\n{\n    return dsn::utils::factory_store<logging_provider>::register_factory(name, f, type);\n}\n} // namespace internal_use_only\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/long_adder.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/long_adder.h>\n\n#ifdef __aarch64__\n#define _mm_free(p) free(p)\n#define _mm_malloc(a, b) malloc(a)\n#else\n#include <mm_malloc.h>\n#endif //__aarch64__\n\n#include <unistd.h>\n\n#include <cstdlib>\n#include <new>\n#include <string>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/rand.h>\n#include <dsn/utility/safe_strerror_posix.h>\n\nnamespace dsn {\n\nnamespace {\n\nconst uint32_t kNumCpus = sysconf(_SC_NPROCESSORS_ONLN);\nuint32_t compute_num_cells()\n{\n    uint32_t n = 1;\n    // Calculate the size. Nearest power of two >= NCPU.\n    // Also handle a negative NCPU, can happen if sysconf name is unknown\n    while (kNumCpus > n) {\n        n <<= 1;\n    }\n    return n;\n}\nconst uint32_t kNumCells = compute_num_cells();\nconst uint32_t kCellMask = kNumCells - 1;\n\ncacheline_aligned_int64 *const kCellsLocked = reinterpret_cast<cacheline_aligned_int64 *>(-1L);\n\n} // anonymous namespace\n\n//\n// cacheline_aligned_int64\n//\n\n/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64_array(uint32_t size)\n{\n    void *buffer = nullptr;\n    int err = posix_memalign(&buffer, CACHELINE_SIZE, sizeof(cacheline_aligned_int64) * size);\n\n    // Generally there are 2 possible errors for posix_memalign as below:\n    // [EINVAL]\n    //     The value of the alignment parameter is not a power of two multiple of sizeof(void *).\n    // [ENOMEM]\n    //     There is insufficient memory available with the requested alignment.\n    // Thus making an assertion here is enough.\n    dassert_f(err == 0, \"error calling posix_memalign: {}\", utils::safe_strerror(err).c_str());\n\n    cacheline_aligned_int64 *array = new (buffer) cacheline_aligned_int64[size];\n    for (uint32_t i = 0; i < size; ++i) {\n        cacheline_aligned_int64 *elem = &(array[i]);\n        dassert_f(\n            (reinterpret_cast<const uintptr_t>(elem) & (sizeof(cacheline_aligned_int64) - 1)) == 0,\n            \"unaligned cacheline_aligned_int64: array={}, index={}, elem={}, mask={}\",\n            fmt::ptr(array),\n            i,\n            fmt::ptr(elem),\n            sizeof(cacheline_aligned_int64) - 1);\n        array[i]._value.store(0);\n    }\n\n    return cacheline_aligned_int64_ptr(array, [](cacheline_aligned_int64 *array) { free(array); });\n}\n\n/* extern */ cacheline_aligned_int64_ptr new_cacheline_aligned_int64()\n{\n    return new_cacheline_aligned_int64_array(1);\n}\n\n//\n// striped64\n//\n__thread uint64_t striped64::_tls_hashcode = 0;\n\nuint64_t striped64::get_tls_hashcode()\n{\n    if (dsn_unlikely(_tls_hashcode == 0)) {\n        const uint64_t tid = static_cast<uint64_t>(utils::get_current_tid());\n        // Avoid zero to allow xorShift rehash, and because 0 indicates an unset\n        // hashcode above.\n        const uint64_t hash = (tid == 0) ? rand::next_u64() : tid;\n        _tls_hashcode = (hash == 0) ? 1 : hash;\n    }\n    return _tls_hashcode;\n}\n\ntemplate <class Updater>\nvoid striped64::retry_update(rehash to_rehash, Updater updater)\n{\n    uint64_t h = get_tls_hashcode();\n    // There are three operations in this loop.\n    //\n    // 1. Try to add to the cacheline_aligned_int64 hash table entry for the thread if the table\n    // exists. When there's contention, rehash to try a different cacheline_aligned_int64.\n    // 2. Try to initialize the hash table.\n    // 3. Try to update the base counter.\n    //\n    // These are predicated on successful CAS operations, which is why it's all wrapped in an\n    // infinite retry loop.\n    while (true) {\n        cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);\n        if (cells != nullptr && cells != kCellsLocked) {\n            if (to_rehash == kRehash) {\n                // CAS failed already, rehash before trying to increment.\n                to_rehash = kNoRehash;\n            } else {\n                cacheline_aligned_int64 *cell = &(_cells[h & kCellMask]);\n                int64_t v = cell->_value.load(std::memory_order_relaxed);\n                if (cell->compare_and_set(v, updater(v))) {\n                    // Successfully CAS'd the corresponding cell, done.\n                    break;\n                }\n            }\n            // Rehash since we failed to CAS, either previously or just now.\n            h ^= h << 13;\n            h ^= h >> 17;\n            h ^= h << 5;\n        } else if (cells == nullptr && _cells.compare_exchange_weak(cells, kCellsLocked)) {\n            _cells_holder = new_cacheline_aligned_int64_array(kNumCells);\n            cells = _cells_holder.get();\n            _cells.store(cells, std::memory_order_release);\n        } else {\n            // Fallback to adding to the base value.\n            // Means the table wasn't initialized or we failed to init it.\n            int64_t v = _base.load(std::memory_order_relaxed);\n            if (cas_base(v, updater(v))) {\n                break;\n            }\n        }\n    }\n\n    // Record index for next time\n    _tls_hashcode = h;\n}\n\nvoid striped64::internal_reset(int64_t initial_value)\n{\n    _base.store(initial_value);\n\n    cacheline_aligned_int64 *c;\n    do {\n        c = _cells.load(std::memory_order_acquire);\n    } while (c == kCellsLocked);\n\n    if (c != nullptr) {\n        for (uint32_t i = 0; i < kNumCells; ++i) {\n            c[i]._value.store(0);\n        }\n    }\n}\n\nvoid striped_long_adder::increment_by(int64_t x)\n{\n    // Use hash table if present. If that fails, call retry_update to rehash and retry.\n    // If no hash table, try to CAS the base counter. If that fails, retry_update to init the table.\n    cacheline_aligned_int64 *cells = _cells.load(std::memory_order_acquire);\n    if (cells != nullptr && cells != kCellsLocked) {\n        cacheline_aligned_int64 *cell = &(cells[get_tls_hashcode() & kCellMask]);\n        dassert_f(\n            (reinterpret_cast<const uintptr_t>(cell) & (sizeof(cacheline_aligned_int64) - 1)) == 0,\n            \"unaligned cacheline_aligned_int64 not allowed for striped64: cell={}, mask={}\",\n            fmt::ptr(cell),\n            sizeof(cacheline_aligned_int64) - 1);\n\n        const int64_t old = cell->_value.load(std::memory_order_relaxed);\n        if (!cell->compare_and_set(old, old + x)) {\n            // When we hit a hash table contention, signal retry_update to rehash.\n            retry_update(kRehash, [x](int64_t old) { return old + x; });\n        }\n    } else {\n        int64_t b = _base.load(std::memory_order_relaxed);\n        if (!cas_base(b, b + x)) {\n            // Attempt to initialize the table. No need to rehash since the contention was for the\n            // base counter, not the hash table.\n            retry_update(kNoRehash, [x](int64_t old) { return old + x; });\n        }\n    }\n}\n\n//\n// striped_long_adder\n//\n\nint64_t striped_long_adder::value() const\n{\n    int64_t sum = _base.load(std::memory_order_relaxed);\n\n    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);\n    if (c != nullptr && c != kCellsLocked) {\n        for (uint32_t i = 0; i < kNumCells; ++i) {\n            sum += c[i]._value.load(std::memory_order_relaxed);\n        }\n    }\n    return sum;\n}\n\nint64_t striped_long_adder::fetch_and_reset()\n{\n    int64_t sum = _base.exchange(0, std::memory_order_relaxed);\n\n    cacheline_aligned_int64 *c = _cells.load(std::memory_order_acquire);\n    if (c != nullptr && c != kCellsLocked) {\n        for (uint32_t i = 0; i < kNumCells; ++i) {\n            sum += c[i]._value.exchange(0, std::memory_order_relaxed);\n        }\n    }\n    return sum;\n}\n\n//\n// concurrent_long_adder\n//\n\nconcurrent_long_adder::concurrent_long_adder()\n    : _cells_holder(new_cacheline_aligned_int64_array(kNumCells)), _cells(_cells_holder.get())\n{\n}\n\nvoid concurrent_long_adder::increment_by(int64_t x)\n{\n    auto task_id = static_cast<uint32_t>(utils::get_current_tid());\n    _cells[task_id & kCellMask]._value.fetch_add(x, std::memory_order_relaxed);\n}\n\nint64_t concurrent_long_adder::value() const\n{\n    int64_t sum = 0;\n    for (uint32_t i = 0; i < kNumCells; ++i) {\n        sum += _cells[i]._value.load(std::memory_order_relaxed);\n    }\n    return sum;\n}\n\nvoid concurrent_long_adder::set(int64_t val)\n{\n    for (uint32_t i = 0; i < kNumCells; ++i) {\n        _cells[i]._value.store(0, std::memory_order_relaxed);\n    }\n    _cells[0]._value.store(val, std::memory_order_relaxed);\n}\n\nint64_t concurrent_long_adder::fetch_and_reset()\n{\n    int64_t sum = 0;\n    for (uint32_t i = 0; i < kNumCells; ++i) {\n        sum += _cells[i]._value.exchange(0, std::memory_order_relaxed);\n    }\n    return sum;\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/long_adder_bench/CMakeLists.txt",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nset(MY_PROJ_NAME long_adder_bench)\nproject(${MY_PROJ_NAME} C CXX)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_runtime dsn_utils)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_executable()\n\ndsn_install_executable()\n"
  },
  {
    "path": "src/utils/long_adder_bench/long_adder_bench.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <chrono>\n#include <cstdlib>\n#include <cstring>\n#include <thread>\n#include <vector>\n\n#include <fmt/ostream.h>\n\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/long_adder.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/string_conv.h>\n\n// The simplest implementation of long adder: just wrap std::atomic<int64_t>.\nclass simple_long_adder\n{\npublic:\n    simple_long_adder() = default;\n\n    ~simple_long_adder() = default;\n\n    inline void increment_by(int64_t x) { _value.fetch_add(x, std::memory_order_relaxed); }\n\n    inline int64_t value() const { return _value.load(std::memory_order_relaxed); }\n\n    inline void reset() { set(0); }\n\n    inline int64_t fetch_and_reset() { return _value.exchange(0, std::memory_order_relaxed); }\n\nprivate:\n    inline void set(int64_t val) { _value.store(val, std::memory_order_relaxed); }\n\n    std::atomic<int64_t> _value{0};\n\n    DISALLOW_COPY_AND_ASSIGN(simple_long_adder);\n};\n\n// A modification of perf_counter_number_atomic from perf_counter.\n// This modification has removed virtual functions from original version, where main interfaces\n// has been implemented as virtual functions, however, which will slow down the execution.\n#define DIVIDE_CONTAINER 107\nclass divided_long_adder\n{\npublic:\n    divided_long_adder()\n    {\n        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {\n            _value[i].store(0);\n        }\n    }\n\n    ~divided_long_adder() = default;\n\n    inline void increment_by(int64_t x)\n    {\n        auto task_id = static_cast<uint32_t>(dsn::utils::get_current_tid());\n        _value[task_id % DIVIDE_CONTAINER].fetch_add(x, std::memory_order_relaxed);\n    }\n\n    int64_t value() const\n    {\n        int64_t sum = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {\n            sum += _value[i].load(std::memory_order_relaxed);\n        }\n        return sum;\n    }\n\n    inline void reset() { set(0); }\n\n    int64_t fetch_and_reset()\n    {\n        int64_t sum = 0;\n        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {\n            sum += _value[i].exchange(0, std::memory_order_relaxed);\n        }\n        return sum;\n    }\n\nprivate:\n    void set(int64_t val)\n    {\n        for (int i = 0; i < DIVIDE_CONTAINER; ++i) {\n            _value[i].store(0, std::memory_order_relaxed);\n        }\n        _value[0].store(val, std::memory_order_relaxed);\n    }\n\n    std::atomic<int64_t> _value[DIVIDE_CONTAINER];\n\n    DISALLOW_COPY_AND_ASSIGN(divided_long_adder);\n};\n\nvoid print_usage(const char *cmd)\n{\n    fmt::print(stderr, \"USAGE: {} <num_operations> <num_threads> <long_adder_type>\\n\", cmd);\n    fmt::print(stderr, \"Run a simple benchmark that executes each sort of long adder.\\n\\n\");\n\n    fmt::print(\n        stderr,\n        \"    <num_operations>       the number of increment operations executed by each thread\\n\");\n    fmt::print(stderr, \"    <num_threads>          the number of threads\\n\");\n    fmt::print(stderr,\n               \"    <long_adder_type>      the type of long adder: simple_long_adder, \"\n               \"divided_long_adder, striped_long_adder, concurrent_long_adder\\n\");\n}\n\ntemplate <typename Adder>\nvoid run_bench(int64_t num_operations, int64_t num_threads, const char *name)\n{\n    dsn::long_adder_wrapper<Adder> adder;\n\n    std::vector<std::thread> threads;\n\n    uint64_t start = dsn_now_ns();\n    for (int64_t i = 0; i < num_threads; i++) {\n        threads.emplace_back([num_operations, &adder]() {\n            for (int64_t i = 0; i < num_operations; ++i) {\n                adder.increment();\n            }\n        });\n    }\n    for (auto &t : threads) {\n        t.join();\n    }\n    uint64_t end = dsn_now_ns();\n\n    auto duration_ns = static_cast<int64_t>(end - start);\n    std::chrono::nanoseconds nano(duration_ns);\n    auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();\n\n    fmt::print(stdout,\n               \"Running {} operations of {} with {} threads took {} seconds, result = {}.\\n\",\n               num_operations,\n               name,\n               num_threads,\n               duration_s,\n               adder.value());\n}\n\nint main(int argc, char **argv)\n{\n    if (argc < 4) {\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    int64_t num_operations;\n    if (!dsn::buf2int64(argv[1], num_operations)) {\n        fmt::print(stderr, \"Invalid num_operations: {}\\n\\n\", argv[1]);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    int64_t num_threads;\n    if (!dsn::buf2int64(argv[2], num_threads)) {\n        fmt::print(stderr, \"Invalid num_threads: {}\\n\\n\", argv[2]);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    const char *long_adder_type = argv[3];\n    if (strcmp(long_adder_type, \"simple_long_adder\") == 0) {\n        run_bench<simple_long_adder>(num_operations, num_threads, long_adder_type);\n    } else if (strcmp(long_adder_type, \"divided_long_adder\") == 0) {\n        run_bench<divided_long_adder>(num_operations, num_threads, long_adder_type);\n    } else if (strcmp(long_adder_type, \"striped_long_adder\") == 0) {\n        run_bench<dsn::striped_long_adder>(num_operations, num_threads, long_adder_type);\n    } else if (strcmp(long_adder_type, \"concurrent_long_adder\") == 0) {\n        run_bench<dsn::concurrent_long_adder>(num_operations, num_threads, long_adder_type);\n    } else {\n        fmt::print(stderr, \"Invalid long_adder_type: {}\\n\\n\", long_adder_type);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    return 0;\n}\n"
  },
  {
    "path": "src/utils/math.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <numeric>\n#include <algorithm>\n#include <math.h>\n#include <dsn/c/api_utilities.h>\n#include <dsn/utility/math.h>\n\nnamespace dsn {\nnamespace utils {\n\ndouble mean_stddev(const std::vector<uint32_t> &result_set, bool partial_sample)\n{\n    dassert(result_set.size() > 1, \"invalid sample data input for stddev\");\n\n    double sum = std::accumulate(result_set.begin(), result_set.end(), 0.0);\n    double mean = sum / result_set.size();\n\n    double accum = 0.0;\n    std::for_each(result_set.begin(), result_set.end(), [&](const double d) {\n        accum += (d - mean) * (d - mean);\n    });\n\n    double stddev;\n    if (partial_sample)\n        stddev = sqrt(accum / (result_set.size() - 1));\n    else\n        stddev = sqrt(accum / (result_set.size()));\n\n    stddev = ((double)((int)((stddev + 0.005) * 100))) / 100;\n    return stddev;\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/memutil.h",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <cstddef>\n\nnamespace dsn {\nnamespace strings_internal {\n\n// This is significantly faster for case-sensitive matches with very\n// few possible matches.  See unit test for benchmarks.\nconst char *memmatch(const char *phaystack, size_t haylen, const char *pneedle, size_t neelen);\n\n} // namespace strings_internal\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/metrics.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/metrics.h>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/utility/rand.h>\n\n#include \"shared_io_service.h\"\n\nnamespace dsn {\n\nmetric_entity::metric_entity(const std::string &id, attr_map &&attrs)\n    : _id(id), _attrs(std::move(attrs))\n{\n}\n\nmetric_entity::~metric_entity() {}\n\nmetric_entity::attr_map metric_entity::attributes() const\n{\n    std::lock_guard<std::mutex> guard(_mtx);\n    return _attrs;\n}\n\nmetric_entity::metric_map metric_entity::metrics() const\n{\n    std::lock_guard<std::mutex> guard(_mtx);\n    return _metrics;\n}\n\nvoid metric_entity::set_attributes(attr_map &&attrs)\n{\n    std::lock_guard<std::mutex> guard(_mtx);\n    _attrs = std::move(attrs);\n}\n\nmetric_entity_ptr metric_entity_prototype::instantiate(const std::string &id,\n                                                       metric_entity::attr_map attrs) const\n{\n    dassert_f(attrs.find(\"entity\") == attrs.end(), \"{}'s attribute \\\"entity\\\" is reserved\", id);\n\n    attrs[\"entity\"] = _name;\n    return metric_registry::instance().find_or_create_entity(id, std::move(attrs));\n}\n\nmetric_entity_ptr metric_entity_prototype::instantiate(const std::string &id) const\n{\n    return instantiate(id, {});\n}\n\nmetric_entity_prototype::metric_entity_prototype(const char *name) : _name(name) {}\n\nmetric_entity_prototype::~metric_entity_prototype() {}\n\nmetric_registry::metric_registry()\n{\n    // We should ensure that metric_registry is destructed before shared_io_service is destructed.\n    // Once shared_io_service is destructed before metric_registry is destructed,\n    // boost::asio::io_service needed by metrics in metric_registry such as percentile_timer will\n    // be released firstly, then will lead to heap-use-after-free error since percentiles in\n    // metric_registry are still running but the resources they needed have been released.\n    tools::shared_io_service::instance();\n}\n\nmetric_registry::~metric_registry() {}\n\nmetric_registry::entity_map metric_registry::entities() const\n{\n    std::lock_guard<std::mutex> guard(_mtx);\n\n    return _entities;\n}\n\nmetric_entity_ptr metric_registry::find_or_create_entity(const std::string &id,\n                                                         metric_entity::attr_map &&attrs)\n{\n    std::lock_guard<std::mutex> guard(_mtx);\n\n    entity_map::const_iterator iter = _entities.find(id);\n\n    metric_entity_ptr entity;\n    if (iter == _entities.end()) {\n        entity = new metric_entity(id, std::move(attrs));\n        _entities[id] = entity;\n    } else {\n        iter->second->set_attributes(std::move(attrs));\n        entity = iter->second;\n    }\n\n    return entity;\n}\n\nmetric_prototype::metric_prototype(const ctor_args &args) : _args(args) {}\n\nmetric_prototype::~metric_prototype() {}\n\nmetric::metric(const metric_prototype *prototype) : _prototype(prototype) {}\n\nuint64_t percentile_timer::generate_initial_delay_ms(uint64_t interval_ms)\n{\n    dcheck_gt(interval_ms, 0);\n\n    if (interval_ms < 1000) {\n        return rand::next_u64() % interval_ms + 50;\n    }\n\n    uint64_t interval_seconds = interval_ms / 1000;\n    return (rand::next_u64() % interval_seconds + 1) * 1000 + rand::next_u64() % 1000;\n}\n\npercentile_timer::percentile_timer(uint64_t interval_ms, exec_fn exec)\n    : _initial_delay_ms(generate_initial_delay_ms(interval_ms)),\n      _interval_ms(interval_ms),\n      _exec(exec),\n      _timer(new boost::asio::deadline_timer(tools::shared_io_service::instance().ios))\n{\n    _timer->expires_from_now(boost::posix_time::milliseconds(_initial_delay_ms));\n    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));\n}\n\nvoid percentile_timer::on_timer(const boost::system::error_code &ec)\n{\n    if (dsn_unlikely(!!ec)) {\n        dassert_f(ec == boost::system::errc::operation_canceled,\n                  \"failed to exec on_timer with an error that cannot be handled: {}\",\n                  ec.message());\n        return;\n    }\n\n    _exec();\n\n    _timer->expires_from_now(boost::posix_time::milliseconds(_interval_ms));\n    _timer->async_wait(std::bind(&percentile_timer::on_timer, this, std::placeholders::_1));\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/output_utils.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"dsn/utility/output_utils.h\"\n\n#include <dsn/c/api_utilities.h>\n\nnamespace dsn {\nnamespace utils {\n\ntemplate <typename Writer>\nvoid json_encode(Writer &writer, const table_printer &tp)\n{\n    if (tp._matrix_data.empty()) {\n        return;\n    }\n    if (!tp._name.empty()) {\n        json::json_encode(writer, tp._name); // table_printer name\n        writer.StartObject();\n    }\n    if (tp._mode == table_printer::data_mode::kMultiColumns) {\n        // The 1st row elements are column names, skip it.\n        for (size_t row = 1; row < tp._matrix_data.size(); ++row) {\n            dsn::json::json_encode(writer, tp._matrix_data[row][0]); // row name\n            writer.StartObject();\n            for (int col = 0; col < tp._matrix_data[row].size(); col++) {\n                dsn::json::json_encode(writer, tp._matrix_data[0][col]);   // column name\n                dsn::json::json_encode(writer, tp._matrix_data[row][col]); // column data\n            }\n            writer.EndObject();\n        }\n    } else if (tp._mode == table_printer::data_mode::kSingleColumn) {\n        for (size_t row = 0; row < tp._matrix_data.size(); ++row) {\n            dsn::json::json_encode(writer, tp._matrix_data[row][0]); // row name\n            dsn::json::json_encode(writer, tp._matrix_data[row][1]); // row data\n        }\n    } else {\n        dassert(false, \"Unknown mode\");\n    }\n    if (!tp._name.empty()) {\n        writer.EndObject();\n    }\n}\n\nvoid table_printer::add_title(const std::string &title, alignment align)\n{\n    check_mode(data_mode::kMultiColumns);\n    dassert(_matrix_data.empty() && _max_col_width.empty(), \"`add_title` must be called only once\");\n    _max_col_width.push_back(title.length());\n    _align_left.push_back(align == alignment::kLeft);\n    add_row(title);\n}\n\nvoid table_printer::add_column(const std::string &col_name, alignment align)\n{\n    check_mode(data_mode::kMultiColumns);\n    dassert(_matrix_data.size() == 1, \"`add_column` must be called before real data appendding\");\n    _max_col_width.push_back(col_name.length());\n    _align_left.push_back(align == alignment::kLeft);\n    append_data(col_name);\n}\n\nvoid table_printer::add_row_name_and_string_data(const std::string &row_name,\n                                                 const std::string &data)\n{\n    // The first row added to the table.\n    if (_max_col_width.empty()) {\n        _max_col_width.push_back(row_name.length());\n        _align_left.push_back(true);\n        _max_col_width.push_back(data.length());\n        _align_left.push_back(true);\n    }\n\n    _matrix_data.emplace_back(std::vector<std::string>());\n    append_string_data(row_name);\n    append_string_data(data);\n}\n\nvoid table_printer::output(std::ostream &out, output_format format) const\n{\n    switch (format) {\n    case output_format::kTabular:\n        output_in_tabular(out);\n        break;\n    case output_format::kJsonCompact:\n        output_in_json<dsn::json::JsonWriter>(out);\n        break;\n    case output_format::kJsonPretty:\n        output_in_json<dsn::json::PrettyJsonWriter>(out);\n        break;\n    default:\n        dassert(false, \"Unknown format\");\n    }\n}\n\nvoid table_printer::output_in_tabular(std::ostream &out) const\n{\n    if (_max_col_width.empty()) {\n        return;\n    }\n\n    std::string separator;\n    if (_mode == data_mode::kSingleColumn) {\n        separator = \": \";\n    } else {\n        dassert(_mode == data_mode::kMultiColumns, \"Unknown mode\");\n    }\n\n    if (!_name.empty()) {\n        out << \"[\" << _name << \"]\" << std::endl;\n    }\n    int i = 0;\n    for (const auto &row : _matrix_data) {\n        for (size_t col = 0; col < row.size(); ++col) {\n            auto data = (col == 0 ? \"\" : separator) + row[col];\n            out << std::setw(_max_col_width[col] + _tabular_width)\n                << (_align_left[col] ? std::left : std::right) << data;\n        }\n        out << std::endl;\n    }\n}\n\nvoid table_printer::append_string_data(const std::string &data)\n{\n    _matrix_data.rbegin()->emplace_back(data);\n    int last_index = _matrix_data.rbegin()->size() - 1;\n    dassert(last_index <= _max_col_width.size(), \"column data exceed\");\n\n    // update column max length\n    int &cur_len = _max_col_width[last_index];\n    if (cur_len < data.size()) {\n        cur_len = data.size();\n    }\n}\n\nvoid table_printer::check_mode(data_mode mode)\n{\n    if (_mode == data_mode::kUninitialized) {\n        _mode = mode;\n        return;\n    }\n    dassert(_mode == mode, \"\");\n}\n\nvoid multi_table_printer::add(table_printer &&tp) { _tps.emplace_back(std::move(tp)); }\n\nvoid multi_table_printer::output(std::ostream &out,\n                                 table_printer::table_printer::output_format format) const\n{\n    switch (format) {\n    case table_printer::output_format::kTabular:\n        output_in_tabular(out);\n        break;\n    case table_printer::output_format::kJsonCompact:\n        output_in_json<dsn::json::JsonWriter>(out);\n        break;\n    case table_printer::output_format::kJsonPretty:\n        output_in_json<dsn::json::PrettyJsonWriter>(out);\n        break;\n    default:\n        dassert(false, \"Unknown format\");\n    }\n}\n\nvoid multi_table_printer::output_in_tabular(std::ostream &out) const\n{\n    for (const auto &tp : _tps) {\n        tp.output_in_tabular(out);\n        out << std::endl;\n    }\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/process_utils.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <array>\n#include <iostream>\n#include <fstream>\n#include <unistd.h>\n#include <sys/syscall.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/preloadable.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utils/time_utils.h>\n\nnamespace dsn {\nnamespace utils {\n\n__thread tls_tid s_tid;\nint get_current_tid_internal() { return static_cast<int>(syscall(SYS_gettid)); }\n\nint pipe_execute(const char *command, std::ostream &output)\n{\n    std::array<char, 256> buffer;\n    int retcode = 0;\n\n    {\n        std::shared_ptr<FILE> command_pipe(popen(command, \"r\"),\n                                           [&retcode](FILE *p) { retcode = pclose(p); });\n        while (!feof(command_pipe.get())) {\n            if (fgets(buffer.data(), 256, command_pipe.get()) != NULL)\n                output << buffer.data();\n        }\n    }\n    return retcode;\n}\n\nvoid process_mem_usage(double &vm_usage, double &resident_set)\n{\n    using std::ios_base;\n    using std::ifstream;\n    using std::string;\n\n    vm_usage = 0.0;\n    resident_set = 0.0;\n\n    // 'file' stat seems to give the most reliable results\n    //\n    ifstream stat_stream(\"/proc/self/stat\", ios_base::in);\n\n    // dummy vars for leading entries in stat that we don't care about\n    //\n    string pid, comm, state, ppid, pgrp, session, tty_nr;\n    string tpgid, flags, minflt, cminflt, majflt, cmajflt;\n    string utime, stime, cutime, cstime, priority, nice;\n    string O, itrealvalue, starttime;\n\n    // the two fields we want\n    //\n    unsigned long vsize;\n    long rss;\n\n    stat_stream >> pid >> comm >> state >> ppid >> pgrp >> session >> tty_nr >> tpgid >> flags >>\n        minflt >> cminflt >> majflt >> cmajflt >> utime >> stime >> cutime >> cstime >> priority >>\n        nice >> O >> itrealvalue >> starttime >> vsize >> rss; // don't care about the rest\n\n    stat_stream.close();\n\n    static long page_size_kb =\n        sysconf(_SC_PAGE_SIZE) / 1024; // in case x86-64 is configured to use 2MB pages\n    vm_usage = vsize / 1024.0;\n    resident_set = rss * page_size_kb;\n}\n\nclass record_process_start_time : public preloadable<record_process_start_time>\n{\npublic:\n    record_process_start_time()\n    {\n        mills = get_current_physical_time_ns() / 1000000;\n        time_ms_to_string(mills, date_time_mills);\n    }\n    uint64_t mills;\n    char date_time_mills[64];\n};\n\n//\n// if you call these functions before \"main\" function,\n// the memory space for these variables have been allocated,\n// but the values aren't initialized as the constructor\n// of \"static_module\" may not been called yet.\n//\nuint64_t process_start_millis() { return record_process_start_time::s_instance.mills; }\nconst char *process_start_date_time_mills()\n{\n    return record_process_start_time::s_instance.date_time_mills;\n}\n}\n}\n"
  },
  {
    "path": "src/utils/rand.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/rand.h>\n#include <random>\n\nnamespace dsn {\nnamespace rand {\n\nthread_local std::ranlux48_base g_thread_local_rng(std::random_device{}());\n\n/*extern*/ uint64_t next_u64(uint64_t min, uint64_t max)\n{\n    return std::uniform_int_distribution<uint64_t>(min, max)(g_thread_local_rng);\n}\n\n/*extern*/ void reseed_thread_local_rng(uint64_t seed) { g_thread_local_rng.seed(seed); }\n\n} // namespace rand\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/rpc_address.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <sys/socket.h>\n#include <netdb.h>\n#include <ifaddrs.h>\n#include <netinet/in.h>\n#include <arpa/inet.h>\n\n#include <dsn/utility/ports.h>\n#include <dsn/utility/string_view.h>\n#include <dsn/utility/fixed_size_buffer_pool.h>\n\n#include <dsn/c/api_utilities.h>\n\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/group_address.h>\n\nnamespace dsn {\n\nconst rpc_address rpc_address::s_invalid_address;\n\n/*static*/\nuint32_t rpc_address::ipv4_from_host(const char *name)\n{\n    sockaddr_in addr;\n    memset(&addr, 0, sizeof(addr));\n\n    addr.sin_family = AF_INET;\n    if ((addr.sin_addr.s_addr = inet_addr(name)) == (unsigned int)(-1)) {\n        hostent *hp = ::gethostbyname(name);\n        int err = h_errno;\n\n        if (hp == nullptr) {\n            derror(\"gethostbyname failed, name = %s, err = %d.\", name, err);\n            return 0;\n        } else {\n            memcpy((void *)&(addr.sin_addr.s_addr), (const void *)hp->h_addr, (size_t)hp->h_length);\n        }\n    }\n\n    // converts from network byte order to host byte order\n    return (uint32_t)ntohl(addr.sin_addr.s_addr);\n}\n\n/*static*/\nbool rpc_address::is_site_local_address(uint32_t ip_net)\n{\n    uint32_t iphost = ntohl(ip_net);\n    return (iphost >= 0x0A000000 && iphost <= 0x0AFFFFFF) || // 10.0.0.0-10.255.255.255\n           (iphost >= 0xAC100000 && iphost <= 0xAC1FFFFF) || // 172.16.0.0-172.31.255.255\n           (iphost >= 0xC0A80000 && iphost <= 0xC0A8FFFF) || // 192.168.0.0-192.168.255.255\n           false;\n}\n\n/*static*/\nbool rpc_address::is_docker_netcard(const char *netcard_interface, uint32_t ip_net)\n{\n    if (dsn::string_view(netcard_interface).find(\"docker\") != dsn::string_view::npos)\n        return true;\n    uint32_t iphost = ntohl(ip_net);\n    return iphost == 0xAC112A01; // 172.17.42.1\n}\n\n/*static*/\nuint32_t rpc_address::ipv4_from_network_interface(const char *network_interface)\n{\n    uint32_t ret = 0;\n\n    struct ifaddrs *ifa = nullptr;\n    if (getifaddrs(&ifa) == 0) {\n        struct ifaddrs *i = ifa;\n        while (i != nullptr) {\n            if (i->ifa_name != nullptr && i->ifa_addr != nullptr &&\n                i->ifa_addr->sa_family == AF_INET) {\n                uint32_t ip_val = ((struct sockaddr_in *)i->ifa_addr)->sin_addr.s_addr;\n                if (strcmp(i->ifa_name, network_interface) == 0 ||\n                    (network_interface[0] == '\\0' && !is_docker_netcard(i->ifa_name, ip_val) &&\n                     is_site_local_address(ip_val))) {\n                    ret = (uint32_t)ntohl(ip_val);\n                    break;\n                } else {\n                    dinfo(\"skip interface(%s), address(%s)\",\n                          i->ifa_name,\n                          rpc_address(ip_val, 0).ipv4_str());\n                }\n            }\n            i = i->ifa_next;\n        }\n\n        if (i == nullptr) {\n            derror(\"get local ip from network interfaces failed, network_interface = %s\",\n                   network_interface);\n        } else {\n            ddebug(\"get ip address from network interface(%s), addr(%s), input interface(\\\"%s\\\")\",\n                   i->ifa_name,\n                   rpc_address(ret, 0).ipv4_str(),\n                   network_interface);\n        }\n\n        if (ifa != nullptr) {\n            // remember to free it\n            freeifaddrs(ifa);\n        }\n    }\n\n    return ret;\n}\n\nrpc_address::~rpc_address() { set_invalid(); }\n\nrpc_address::rpc_address(const rpc_address &another) { *this = another; }\n\nrpc_address &rpc_address::operator=(const rpc_address &another)\n{\n    if (this == &another) {\n        // avoid memory leak\n        return *this;\n    }\n    set_invalid();\n    _addr = another._addr;\n    switch (another.type()) {\n    case HOST_TYPE_GROUP:\n        group_address()->add_ref();\n        break;\n    default:\n        break;\n    }\n    return *this;\n}\n\nvoid rpc_address::assign_group(const char *name)\n{\n    set_invalid();\n    _addr.group.type = HOST_TYPE_GROUP;\n    dsn::rpc_group_address *addr = new dsn::rpc_group_address(name);\n    // take the lifetime of rpc_uri_address, release_ref when change value or call destructor\n    addr->add_ref();\n    _addr.group.group = (uint64_t)addr;\n}\n\nvoid rpc_address::set_invalid()\n{\n    switch (type()) {\n    case HOST_TYPE_GROUP:\n        group_address()->release_ref();\n        break;\n    default:\n        break;\n    }\n    _addr.value = 0;\n}\n\nstatic __thread fixed_size_buffer_pool<8, 256> bf;\n\nconst char *rpc_address::ipv4_str() const\n{\n    char *p = bf.next();\n    auto sz = bf.get_chunk_size();\n    struct in_addr net_addr;\n\n    if (_addr.v4.type == HOST_TYPE_IPV4) {\n        net_addr.s_addr = htonl(ip());\n        inet_ntop(AF_INET, &net_addr, p, sz);\n    } else {\n        p = (char *)\"invalid_ipv4\";\n    }\n    return p;\n}\n\nconst char *rpc_address::to_string() const\n{\n    char *p = bf.next();\n    auto sz = bf.get_chunk_size();\n    struct in_addr net_addr;\n    int ip_len;\n\n    switch (_addr.v4.type) {\n    case HOST_TYPE_IPV4:\n        net_addr.s_addr = htonl(ip());\n        inet_ntop(AF_INET, &net_addr, p, sz);\n        ip_len = strlen(p);\n        snprintf_p(p + ip_len, sz - ip_len, \":%hu\", port());\n        break;\n    case HOST_TYPE_GROUP:\n        p = (char *)group_address()->name();\n        break;\n    default:\n        p = (char *)\"invalid address\";\n        break;\n    }\n\n    return (const char *)p;\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/safe_strerror_posix.cpp",
    "content": "// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style license that can be\n// found in the LICENSE file.\n\n#include <dsn/utility/safe_strerror_posix.h>\n\n#include <errno.h>\n#include <stdio.h>\n#include <string.h>\n\nnamespace dsn {\nnamespace utils {\n#if defined(__GLIBC__) || defined(OS_NACL)\n#define USE_HISTORICAL_STRERRO_R 1\n#else\n#define USE_HISTORICAL_STRERRO_R 0\n#endif\n\n#if USE_HISTORICAL_STRERRO_R && defined(__GNUC__)\n// GCC will complain about the unused second wrap function unless we tell it\n// that we meant for them to be potentially unused, which is exactly what this\n// attribute is for.\n#define POSSIBLY_UNUSED __attribute__((unused))\n#else\n#define POSSIBLY_UNUSED\n#endif\n\n#if USE_HISTORICAL_STRERRO_R\n// glibc has two strerror_r functions: a historical GNU-specific one that\n// returns type char *, and a POSIX.1-2001 compliant one available since 2.3.4\n// that returns int. This wraps the GNU-specific one.\nstatic void POSSIBLY_UNUSED wrap_posix_strerror_r(char *(*strerror_r_ptr)(int, char *, size_t),\n                                                  int err,\n                                                  char *buf,\n                                                  size_t len)\n{\n    // GNU version.\n    char *rc = (*strerror_r_ptr)(err, buf, len);\n    if (rc != buf) {\n        // glibc did not use buf and returned a static string instead. Copy it\n        // into buf.\n        buf[0] = '\\0';\n        strncat(buf, rc, len - 1);\n    }\n    // The GNU version never fails. Unknown errors get an \"unknown error\" message.\n    // The result is always null terminated.\n}\n#endif // USE_HISTORICAL_STRERRO_R\n\n// Wrapper for strerror_r functions that implement the POSIX interface. POSIX\n// does not define the behaviour for some of the edge cases, so we wrap it to\n// guarantee that they are handled. This is compiled on all POSIX platforms, but\n// it will only be used on Linux if the POSIX strerror_r implementation is\n// being used (see below).\nstatic void POSSIBLY_UNUSED wrap_posix_strerror_r(int (*strerror_r_ptr)(int, char *, size_t),\n                                                  int err,\n                                                  char *buf,\n                                                  size_t len)\n{\n    int old_errno = errno;\n    // Have to cast since otherwise we get an error if this is the GNU version\n    // (but in such a scenario this function is never called). Sadly we can't use\n    // C++-style casts because the appropriate one is reinterpret_cast but it's\n    // considered illegal to reinterpret_cast a type to itself, so we get an\n    // error in the opposite case.\n    int result = (*strerror_r_ptr)(err, buf, len);\n    if (result == 0) {\n        // POSIX is vague about whether the string will be terminated, although\n        // it indirectly implies that typically ERANGE will be returned, instead\n        // of truncating the string. We play it safe by always terminating the\n        // string explicitly.\n        buf[len - 1] = '\\0';\n    } else {\n        // Error. POSIX is vague about whether the return value is itself a system\n        // error code or something else. On Linux currently it is -1 and errno is\n        // set. On BSD-derived systems it is a system error and errno is unchanged.\n        // We try and detect which case it is so as to put as much useful info as\n        // we can into our message.\n        int strerror_error; // The error encountered in strerror\n        int new_errno = errno;\n        if (new_errno != old_errno) {\n            // errno was changed, so probably the return value is just -1 or something\n            // else that doesn't provide any info, and errno is the error.\n            strerror_error = new_errno;\n        } else {\n            // Either the error from strerror_r was the same as the previous value, or\n            // errno wasn't used. Assume the latter.\n            strerror_error = result;\n        }\n        // snprintf truncates and always null-terminates.\n        snprintf(buf, len, \"Error %d while retrieving error %d\", strerror_error, err);\n    }\n    errno = old_errno;\n}\n\nvoid safe_strerror_r(int err, char *buf, size_t len)\n{\n    if (buf == NULL || len <= 0) {\n        return;\n    }\n    // If using glibc (i.e., Linux), the compiler will automatically select the\n    // appropriate overloaded function based on the function type of strerror_r.\n    // The other one will be elided from the translation unit since both are\n    // static.\n    wrap_posix_strerror_r(&strerror_r, err, buf, len);\n}\n\nstd::string safe_strerror(int err)\n{\n    const int buffer_size = 256;\n    char buf[buffer_size];\n    safe_strerror_r(err, buf, sizeof(buf));\n    return std::string(buf);\n}\n}\n}\n"
  },
  {
    "path": "src/utils/shared_io_service.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"shared_io_service.h\"\n\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace tools {\n\nconst uint32_t kMinTimerServiceWorkerCount = 3;\nDSN_DEFINE_uint32(\"core\",\n                  timer_service_worker_count,\n                  kMinTimerServiceWorkerCount,\n                  \"the number of threads for timer service\");\nDSN_DEFINE_validator(timer_service_worker_count, [](uint32_t worker_count) -> bool {\n    if (worker_count < kMinTimerServiceWorkerCount) {\n        derror(\"timer_service_worker_count should be at least 3, where one thread is used to \"\n               \"collect all metrics from registery for monitoring systems, and another two threads \"\n               \"are used to compute percentiles.\");\n        return false;\n    }\n    return true;\n});\n\nshared_io_service::shared_io_service()\n{\n    _workers.reserve(FLAGS_timer_service_worker_count);\n    for (uint32_t i = 0; i < FLAGS_timer_service_worker_count; ++i) {\n        _workers.emplace_back([this]() {\n            boost::asio::io_service::work work(ios);\n            ios.run();\n        });\n    }\n}\n\nshared_io_service::~shared_io_service()\n{\n    ios.stop();\n    for (auto &worker : _workers) {\n        worker.join();\n    }\n}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/shared_io_service.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#pragma once\n\n#include <thread>\n#include <vector>\n\n#include <boost/asio.hpp>\n\n#include <dsn/utility/singleton.h>\n\nnamespace dsn {\nnamespace tools {\n\n// TODO: seperate this into per-node service, so we can use\n// task::get_current_node for faster access to the nodes in all tasks\n// coz tasks may run in io-threads when [task.xxx] allow_inline is true\nclass shared_io_service : public utils::singleton<shared_io_service>\n{\npublic:\n    boost::asio::io_service ios;\n\nprivate:\n    friend class utils::singleton<shared_io_service>;\n\n    shared_io_service();\n    ~shared_io_service();\n\n    std::vector<std::thread> _workers;\n};\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/simple_logger.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"simple_logger.h\"\n#include <sstream>\n#include <dsn/utility/filesystem.h>\n#include <dsn/utility/flags.h>\n#include <dsn/utils/time_utils.h>\n#include <fmt/format.h>\n\nnamespace dsn {\nnamespace tools {\n\nDSN_DEFINE_bool(\"tools.simple_logger\", fast_flush, false, \"whether to flush immediately\");\n\nDSN_DEFINE_bool(\"tools.simple_logger\",\n                short_header,\n                true,\n                \"whether to use short header (excluding file/function etc.)\");\n\nDSN_DEFINE_uint64(\"tools.simple_logger\",\n                  max_number_of_log_files_on_disk,\n                  20,\n                  \"max number of log files reserved on disk, older logs are auto deleted\");\n\nDSN_DEFINE_string(\"tools.simple_logger\",\n                  stderr_start_level,\n                  \"LOG_LEVEL_WARNING\",\n                  \"copy log messages at or above this level to stderr in addition to logfiles\");\nDSN_DEFINE_validator(stderr_start_level, [](const char *level) -> bool {\n    return strcmp(level, \"LOG_LEVEL_INVALID\") != 0;\n});\n\nstatic void print_header(FILE *fp, dsn_log_level_t log_level)\n{\n    static char s_level_char[] = \"IDWEF\";\n\n    uint64_t ts = dsn_now_ns();\n    std::string time_str;\n    dsn::utils::time_ms_to_string(ts / 1000000, time_str);\n\n    int tid = dsn::utils::get_current_tid();\n    fmt::print(fp,\n               \"{}{} ({} {}) {}\",\n               s_level_char[log_level],\n               time_str,\n               ts,\n               tid,\n               log_prefixed_message_func().c_str());\n}\n\nscreen_logger::screen_logger(bool short_header) : logging_provider(\"./\")\n{\n    _short_header = short_header;\n}\n\nscreen_logger::screen_logger(const char *log_dir) : logging_provider(log_dir)\n{\n    _short_header =\n        dsn_config_get_value_bool(\"tools.screen_logger\",\n                                  \"short_header\",\n                                  true,\n                                  \"whether to use short header (excluding file/function etc.)\");\n}\n\nscreen_logger::~screen_logger(void) {}\n\nvoid screen_logger::dsn_logv(const char *file,\n                             const char *function,\n                             const int line,\n                             dsn_log_level_t log_level,\n                             const char *fmt,\n                             va_list args)\n{\n    utils::auto_lock<::dsn::utils::ex_lock_nr> l(_lock);\n\n    print_header(stdout, log_level);\n    if (!_short_header) {\n        printf(\"%s:%d:%s(): \", file, line, function);\n    }\n    vprintf(fmt, args);\n    printf(\"\\n\");\n}\n\nvoid screen_logger::flush() { ::fflush(stdout); }\n\nsimple_logger::simple_logger(const char *log_dir) : logging_provider(log_dir)\n{\n    _log_dir = std::string(log_dir);\n    // we assume all valid entries are positive\n    _start_index = 0;\n    _index = 1;\n    _lines = 0;\n    _log = nullptr;\n    _stderr_start_level = enum_from_string(FLAGS_stderr_start_level, LOG_LEVEL_INVALID);\n\n    // check existing log files\n    std::vector<std::string> sub_list;\n    if (!dsn::utils::filesystem::get_subfiles(_log_dir, sub_list, false)) {\n        dassert(false, \"Fail to get subfiles in %s.\", _log_dir.c_str());\n    }\n    for (auto &fpath : sub_list) {\n        auto &&name = dsn::utils::filesystem::get_file_name(fpath);\n        if (name.length() <= 8 || name.substr(0, 4) != \"log.\")\n            continue;\n\n        int index;\n        if (1 != sscanf(name.c_str(), \"log.%d.txt\", &index) || index <= 0)\n            continue;\n\n        if (index > _index)\n            _index = index;\n\n        if (_start_index == 0 || index < _start_index)\n            _start_index = index;\n    }\n    sub_list.clear();\n\n    if (_start_index == 0)\n        _start_index = _index;\n    else\n        ++_index;\n\n    create_log_file();\n}\n\nvoid simple_logger::create_log_file()\n{\n    if (_log != nullptr)\n        ::fclose(_log);\n\n    _lines = 0;\n\n    std::stringstream str;\n    str << _log_dir << \"/log.\" << _index++ << \".txt\";\n    _log = ::fopen(str.str().c_str(), \"w+\");\n\n    // TODO: move gc out of criticial path\n    while (_index - _start_index > FLAGS_max_number_of_log_files_on_disk) {\n        std::stringstream str2;\n        str2 << \"log.\" << _start_index++ << \".txt\";\n        auto dp = utils::filesystem::path_combine(_log_dir, str2.str());\n        if (utils::filesystem::file_exists(dp)) {\n            if (::remove(dp.c_str()) != 0) {\n                // if remove failed, just print log and ignore it.\n                printf(\"Failed to remove garbage log file %s\\n\", dp.c_str());\n            }\n        }\n    }\n}\n\nsimple_logger::~simple_logger(void)\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n    ::fclose(_log);\n}\n\nvoid simple_logger::flush()\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n    ::fflush(_log);\n    ::fflush(stdout);\n}\n\nvoid simple_logger::dsn_logv(const char *file,\n                             const char *function,\n                             const int line,\n                             dsn_log_level_t log_level,\n                             const char *fmt,\n                             va_list args)\n{\n    va_list args2;\n    if (log_level >= _stderr_start_level) {\n        va_copy(args2, args);\n    }\n\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n\n    print_header(_log, log_level);\n    if (!FLAGS_short_header) {\n        fprintf(_log, \"%s:%d:%s(): \", file, line, function);\n    }\n    vfprintf(_log, fmt, args);\n    fprintf(_log, \"\\n\");\n    if (FLAGS_fast_flush || log_level >= LOG_LEVEL_ERROR) {\n        ::fflush(_log);\n    }\n\n    if (log_level >= _stderr_start_level) {\n        print_header(stdout, log_level);\n        if (!FLAGS_short_header) {\n            printf(\"%s:%d:%s(): \", file, line, function);\n        }\n        vprintf(fmt, args2);\n        printf(\"\\n\");\n    }\n\n    if (++_lines >= 200000) {\n        create_log_file();\n    }\n}\n\nvoid simple_logger::dsn_log(const char *file,\n                            const char *function,\n                            const int line,\n                            dsn_log_level_t log_level,\n                            const char *str)\n{\n    utils::auto_lock<::dsn::utils::ex_lock> l(_lock);\n\n    print_header(_log, log_level);\n    if (!FLAGS_short_header) {\n        fprintf(_log, \"%s:%d:%s(): \", file, line, function);\n    }\n    fprintf(_log, \"%s\\n\", str);\n    if (FLAGS_fast_flush || log_level >= LOG_LEVEL_ERROR) {\n        ::fflush(_log);\n    }\n\n    if (log_level >= _stderr_start_level) {\n        print_header(stdout, log_level);\n        if (!FLAGS_short_header) {\n            printf(\"%s:%d:%s(): \", file, line, function);\n        }\n        printf(\"%s\\n\", str);\n    }\n\n    if (++_lines >= 200000) {\n        create_log_file();\n    }\n}\n\n} // namespace tools\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/simple_logger.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/tool_api.h>\n#include <thread>\n#include <cstdio>\n\nnamespace dsn {\nnamespace tools {\n\n/*\n * screen_logger provides a logger which writes to terminal.\n */\nclass screen_logger : public logging_provider\n{\npublic:\n    screen_logger(bool short_header);\n    screen_logger(const char *log_dir);\n    virtual ~screen_logger(void);\n\n    virtual void dsn_logv(const char *file,\n                          const char *function,\n                          const int line,\n                          dsn_log_level_t log_level,\n                          const char *fmt,\n                          va_list args);\n\n    virtual void dsn_log(const char *file,\n                         const char *function,\n                         const int line,\n                         dsn_log_level_t log_level,\n                         const char *str){};\n\n    virtual void flush();\n\nprivate:\n    ::dsn::utils::ex_lock_nr _lock;\n    bool _short_header;\n};\n\n/*\n * simple_logger provides a logger which writes to file.\n * The max number of lines in a logger file is 200000.\n */\nclass simple_logger : public logging_provider\n{\npublic:\n    simple_logger(const char *log_dir);\n    virtual ~simple_logger(void);\n\n    virtual void dsn_logv(const char *file,\n                          const char *function,\n                          const int line,\n                          dsn_log_level_t log_level,\n                          const char *fmt,\n                          va_list args);\n\n    virtual void dsn_log(const char *file,\n                         const char *function,\n                         const int line,\n                         dsn_log_level_t log_level,\n                         const char *str);\n\n    virtual void flush();\n\nprivate:\n    void create_log_file();\n\nprivate:\n    std::string _log_dir;\n    ::dsn::utils::ex_lock _lock; // use recursive lock to avoid dead lock when flush() is called\n                                 // in signal handler if cored for bad logging format reason.\n    FILE *_log;\n    int _start_index;\n    int _index;\n    int _lines;\n    dsn_log_level_t _stderr_start_level;\n};\n}\n}\n"
  },
  {
    "path": "src/utils/string_view.cpp",
    "content": "//\n// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <dsn/utility/string_view.h>\n#include <ostream>\n\n#include \"memutil.h\"\n\nnamespace dsn {\n\nnamespace {\nvoid WritePadding(std::ostream &o, size_t pad)\n{\n    char fill_buf[32];\n    memset(fill_buf, o.fill(), sizeof(fill_buf));\n    while (pad) {\n        size_t n = std::min(pad, sizeof(fill_buf));\n        o.write(fill_buf, n);\n        pad -= n;\n    }\n}\n} // namespace\n\nstd::ostream &operator<<(std::ostream &o, string_view piece)\n{\n    std::ostream::sentry sentry(o);\n    if (sentry) {\n        size_t lpad = 0;\n        size_t rpad = 0;\n        if (static_cast<size_t>(o.width()) > piece.size()) {\n            size_t pad = o.width() - piece.size();\n            if ((o.flags() & o.adjustfield) == o.left) {\n                rpad = pad;\n            } else {\n                lpad = pad;\n            }\n        }\n        if (lpad)\n            WritePadding(o, lpad);\n        o.write(piece.data(), piece.size());\n        if (rpad)\n            WritePadding(o, rpad);\n        o.width(0);\n    }\n    return o;\n}\n\nnamespace strings_internal {\n\n// This is significantly faster for case-sensitive matches with very\n// few possible matches.  See unit test for benchmarks.\nconst char *memmatch(const char *phaystack, size_t haylen, const char *pneedle, size_t neelen)\n{\n    if (0 == neelen) {\n        return phaystack; // even if haylen is 0\n    }\n    if (haylen < neelen)\n        return nullptr;\n\n    const char *match;\n    const char *hayend = phaystack + haylen - neelen + 1;\n    // A static cast is used here to work around the fact that memchr returns\n    // a void* on Posix-compliant systems and const void* on Windows.\n    while ((match = static_cast<const char *>(memchr(phaystack, pneedle[0], hayend - phaystack)))) {\n        if (memcmp(match, pneedle, neelen) == 0)\n            return match;\n        else\n            phaystack = match + 1;\n    }\n    return nullptr;\n}\n\n} // namespace strings_internal\n\nstring_view::size_type string_view::find(string_view s, size_type pos) const noexcept\n{\n    if (empty() || pos > length_) {\n        if (empty() && pos == 0 && s.empty())\n            return 0;\n        return npos;\n    }\n    const char *result = strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);\n    return result ? result - ptr_ : npos;\n}\n\nconstexpr string_view::size_type string_view::npos;\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/strings.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#include <cstring>\n#include <sstream>\n#include <openssl/md5.h>\n#include <dsn/utility/strings.h>\n\nnamespace dsn {\nnamespace utils {\n\nstd::string get_last_component(const std::string &input, const char splitters[])\n{\n    int index = -1;\n    const char *s = splitters;\n\n    while (*s != 0) {\n        auto pos = input.find_last_of(*s);\n        if (pos != std::string::npos && (static_cast<int>(pos) > index))\n            index = static_cast<int>(pos);\n        s++;\n    }\n\n    if (index != -1)\n        return input.substr(index + 1);\n    else\n        return input;\n}\n\nvoid split_args(const char *args,\n                /*out*/ std::vector<std::string> &sargs,\n                char splitter,\n                bool keep_place_holder)\n{\n    sargs.clear();\n    std::string v(args);\n    uint64_t last_pos = 0;\n    while (true) {\n        auto pos = v.find(splitter, last_pos);\n        if (pos != std::string::npos) {\n            std::string s = trim_string((char *)v.substr(last_pos, pos - last_pos).c_str());\n            if (!s.empty()) {\n                sargs.push_back(s);\n            } else if (keep_place_holder) {\n                sargs.emplace_back(\"\");\n            }\n            last_pos = pos + 1;\n        } else {\n            std::string s = trim_string((char *)v.substr(last_pos).c_str());\n            if (!s.empty()) {\n                sargs.push_back(s);\n            } else if (keep_place_holder) {\n                sargs.emplace_back(\"\");\n            }\n            break;\n        }\n    }\n}\n\nvoid split_args(const char *args,\n                /*out*/ std::unordered_set<std::string> &sargs,\n                char splitter,\n                bool keep_place_holder)\n{\n    std::vector<std::string> sargs_vec;\n    split_args(args, sargs_vec, splitter, keep_place_holder);\n    sargs.insert(sargs_vec.begin(), sargs_vec.end());\n}\n\nvoid split_args(const char *args, /*out*/ std::list<std::string> &sargs, char splitter)\n{\n    sargs.clear();\n\n    std::string v(args);\n\n    int lastPos = 0;\n    while (true) {\n        auto pos = v.find(splitter, lastPos);\n        if (pos != std::string::npos) {\n            std::string s = v.substr(lastPos, pos - lastPos);\n            if (s.length() > 0) {\n                std::string s2 = trim_string((char *)s.c_str());\n                if (s2.length() > 0)\n                    sargs.push_back(s2);\n            }\n            lastPos = static_cast<int>(pos + 1);\n        } else {\n            std::string s = v.substr(lastPos);\n            if (s.length() > 0) {\n                std::string s2 = trim_string((char *)s.c_str());\n                if (s2.length() > 0)\n                    sargs.push_back(s2);\n            }\n            break;\n        }\n    }\n}\n\nbool parse_kv_map(const char *args,\n                  /*out*/ std::map<std::string, std::string> &kv_map,\n                  char item_splitter,\n                  char kv_splitter,\n                  bool allow_dup_key)\n{\n    kv_map.clear();\n    std::vector<std::string> splits;\n    split_args(args, splits, item_splitter);\n    for (std::string &i : splits) {\n        if (i.empty())\n            continue;\n        size_t pos = i.find(kv_splitter);\n        if (pos == std::string::npos) {\n            return false;\n        }\n        std::string key = i.substr(0, pos);\n        std::string value = i.substr(pos + 1);\n        if (!allow_dup_key && kv_map.find(key) != kv_map.end()) {\n            return false;\n        }\n        kv_map[key] = value;\n    }\n    return true;\n}\n\nvoid kv_map_to_stream(const std::map<std::string, std::string> &kv_map,\n                      /*out*/ std::ostream &oss,\n                      char item_splitter,\n                      char kv_splitter)\n{\n    int i = 0;\n    for (auto &kv : kv_map) {\n        if (i > 0)\n            oss << item_splitter;\n        oss << kv.first << kv_splitter << kv.second;\n        i++;\n    }\n}\n\nstd::string kv_map_to_string(const std::map<std::string, std::string> &kv_map,\n                             char item_splitter,\n                             char kv_splitter)\n{\n    std::ostringstream oss;\n    kv_map_to_stream(kv_map, oss, item_splitter, kv_splitter);\n    return oss.str();\n}\n\nstd::string\nreplace_string(std::string subject, const std::string &search, const std::string &replace)\n{\n    size_t pos = 0;\n    while ((pos = subject.find(search, pos)) != std::string::npos) {\n        subject.replace(pos, search.length(), replace);\n        pos += replace.length();\n    }\n    return subject;\n}\n\nchar *trim_string(char *s)\n{\n    while (*s != '\\0' && (*s == ' ' || *s == '\\t')) {\n        s++;\n    }\n    char *r = s;\n    s += strlen(s);\n    while (s >= r && (*s == '\\0' || *s == ' ' || *s == '\\t' || *s == '\\r' || *s == '\\n')) {\n        *s = '\\0';\n        s--;\n    }\n    return r;\n}\n\nstd::string string_md5(const char *buffer, unsigned length)\n{\n    unsigned char out[MD5_DIGEST_LENGTH];\n    MD5_CTX c;\n    MD5_Init(&c);\n\n    int offset = 0;\n    while (offset < length) {\n        int block = length - offset;\n        if (block > 4096)\n            block = 4096;\n        MD5_Update(&c, buffer, block);\n        offset += block;\n        buffer += block;\n    }\n    MD5_Final(out, &c);\n\n    char str[MD5_DIGEST_LENGTH * 2 + 1];\n    str[MD5_DIGEST_LENGTH * 2] = 0;\n    for (int n = 0; n < MD5_DIGEST_LENGTH; n++)\n        sprintf(str + n + n, \"%02x\", out[n]);\n\n    std::string result;\n    result.assign(str);\n\n    return result;\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn_utils_tests)\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_runtime\n                 dsn_utils\n                 gtest\n                 )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"${CMAKE_CURRENT_SOURCE_DIR}/config-bad-section.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-dup-key.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-dup-section.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-empty.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-no-key.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-no-section.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-null-section.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-sample.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/config-unmatch-section.ini\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n        \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n        )\nadd_subdirectory(nth_element_bench)\nadd_definitions(-Wno-dangling-else)\ndsn_add_test()\n"
  },
  {
    "path": "src/utils/test/TokenBucketTest.cpp",
    "content": "/*\n * Copyright (c) Facebook, Inc. and its affiliates.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#include \"TokenBucketTest.h\"\n#include <gtest/gtest.h>\n#include <dsn/utility/TokenBucket.h>\n\nusing namespace folly;\n\nTEST(TokenBucket, ReverseTime)\n{\n    const double rate = 1000;\n    TokenBucket tokenBucket(rate, rate * 0.01 + 1e-6, 0);\n    size_t count = 0;\n    while (tokenBucket.consume(1, 0.1)) {\n        count += 1;\n    }\n    EXPECT_EQ(10, count);\n    // Going backwards in time has no affect on the toke count (this protects\n    // against different threads providing out of order timestamps).\n    double tokensBefore = tokenBucket.available();\n    EXPECT_FALSE(tokenBucket.consume(1, 0.09999999));\n    EXPECT_EQ(tokensBefore, tokenBucket.available());\n}\n\nTEST_P(TokenBucketTest, sanity)\n{\n    std::pair<double, double> params = GetParam();\n    double rate = params.first;\n    double consumeSize = params.second;\n\n    const double tenMillisecondBurst = rate * 0.010;\n    // Select a burst size of 10 milliseconds at the max rate or the consume size\n    // if 10 ms at rate is too small.\n    const double burstSize = std::max(consumeSize, tenMillisecondBurst);\n    TokenBucket tokenBucket(rate, burstSize, 0);\n    double tokenCounter = 0;\n    double currentTime = 0;\n    // Simulate time advancing 10 seconds\n    for (; currentTime <= 10.0; currentTime += 0.001) {\n        EXPECT_FALSE(tokenBucket.consume(burstSize + 1, currentTime));\n        while (tokenBucket.consume(consumeSize, currentTime)) {\n            tokenCounter += consumeSize;\n        }\n        // Tokens consumed should exceed some lower bound based on rate.\n        // Note: The token bucket implementation is not precise, so the lower bound\n        // is somewhat fudged. The upper bound is accurate however.\n        EXPECT_LE(rate * currentTime * 0.9 - 1, tokenCounter);\n        // Tokens consumed should not exceed some upper bound based on rate.\n        EXPECT_GE(rate * currentTime + 1e-6, tokenCounter);\n    }\n}\n\nstatic std::vector<std::pair<double, double>> rateToConsumeSize = {\n    {100, 1}, {1000, 1}, {10000, 1}, {10000, 5},\n};\n\nINSTANTIATE_TEST_CASE_P(TokenBucket, TokenBucketTest, ::testing::ValuesIn(rateToConsumeSize));\n\nTEST(TokenBucket, drainOnFail)\n{\n    DynamicTokenBucket tokenBucket;\n\n    // Almost empty the bucket\n    EXPECT_TRUE(tokenBucket.consume(9, 10, 10, 1));\n\n    // Request more tokens than available\n    EXPECT_FALSE(tokenBucket.consume(5, 10, 10, 1));\n    EXPECT_DOUBLE_EQ(1.0, tokenBucket.available(10, 10, 1));\n\n    // Again request more tokens than available, but ask to drain\n    EXPECT_DOUBLE_EQ(1.0, tokenBucket.consumeOrDrain(5, 10, 10, 1));\n    EXPECT_DOUBLE_EQ(0.0, tokenBucket.consumeOrDrain(1, 10, 10, 1));\n}\n\nTEST(TokenBucket, returnTokensTest)\n{\n    DynamicTokenBucket tokenBucket;\n\n    // Empty the bucket.\n    EXPECT_TRUE(tokenBucket.consume(10, 10, 10, 5));\n    // consume should fail now.\n    EXPECT_FALSE(tokenBucket.consume(1, 10, 10, 5));\n    EXPECT_DOUBLE_EQ(0.0, tokenBucket.consumeOrDrain(1, 10, 10, 5));\n\n    // Return tokens. Return 40 'excess' tokens but they wont be available to\n    // later callers.\n    tokenBucket.returnTokens(50, 10);\n    // Should be able to allocate 10 tokens again but the extra 40 returned in\n    // previous call are gone.\n    EXPECT_TRUE(tokenBucket.consume(10, 10, 10, 5));\n    EXPECT_FALSE(tokenBucket.consume(1, 10, 10, 5));\n}\n\nTEST(TokenBucket, consumeOrBorrowTest)\n{\n    DynamicTokenBucket tokenBucket;\n\n    // Empty the bucket.\n    EXPECT_TRUE(tokenBucket.consume(10, 10, 10, 1));\n    // consume should fail now.\n    EXPECT_FALSE(tokenBucket.consume(1, 10, 10, 1));\n    // Now borrow from future allocations. Each call is asking for 1s worth of\n    // allocations so it should return (i+1)*1s in the ith iteration as the time\n    // caller needs to wait.\n    for (int i = 0; i < 10; ++i) {\n        auto waitTime = tokenBucket.consumeWithBorrowNonBlocking(10, 10, 10, 1);\n        EXPECT_TRUE(waitTime.is_initialized());\n        EXPECT_DOUBLE_EQ((i + 1) * 1.0, *waitTime);\n    }\n\n    // No allocation will succeed until nowInSeconds goes higher than 11s.\n    EXPECT_FALSE(tokenBucket.consume(1, 10, 10, 11));\n}\n"
  },
  {
    "path": "src/utils/test/TokenBucketTest.h",
    "content": "/*\n * Copyright (c) Facebook, Inc. and its affiliates.\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n *     http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\n#pragma once\n\n#include <gtest/gtest.h>\n#include <dsn/utility/TokenBucket.h>\n\nnamespace folly {\n\nstruct TokenBucketTest : public ::testing::TestWithParam<std::pair<double, double>>\n{\n};\n\n} // namespace folly\n"
  },
  {
    "path": "src/utils/test/address.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for rpc_address.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool-api/rpc_address.h>\n#include <dsn/tool-api/group_address.h>\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\n\nstatic inline uint32_t host_ipv4(uint8_t sec1, uint8_t sec2, uint8_t sec3, uint8_t sec4)\n{\n    uint32_t ip = 0;\n    ip |= (uint32_t)sec1 << 24;\n    ip |= (uint32_t)sec2 << 16;\n    ip |= (uint32_t)sec3 << 8;\n    ip |= (uint32_t)sec4;\n    return ip;\n}\n\nTEST(core, rpc_address_ipv4_from_host)\n{\n    // localhost --> 127.0.0.1\n    // on some systems \"localhost\" could be \"127.0.1.1\" (debian)\n    ASSERT_TRUE(host_ipv4(127, 0, 0, 1) == rpc_address::ipv4_from_host(\"localhost\") ||\n                host_ipv4(127, 0, 1, 1) == rpc_address::ipv4_from_host(\"localhost\"));\n\n    // 127.0.0.1 --> 127.0.0.1\n    ASSERT_EQ(host_ipv4(127, 0, 0, 1), rpc_address::ipv4_from_host(\"127.0.0.1\"));\n}\n\nTEST(core, rpc_address_ipv4_from_network_interface)\n{\n    ASSERT_EQ(host_ipv4(127, 0, 0, 1), rpc_address::ipv4_from_network_interface(\"lo\"));\n    ASSERT_EQ(host_ipv4(0, 0, 0, 0),\n              rpc_address::ipv4_from_network_interface(\"not_exist_interface\"));\n}\n\nTEST(core, is_site_local_address)\n{\n    ASSERT_FALSE(rpc_address::is_site_local_address(htonl(host_ipv4(1, 2, 3, 4))));\n    ASSERT_TRUE(rpc_address::is_site_local_address(htonl(host_ipv4(10, 235, 111, 111))));\n    ASSERT_FALSE(rpc_address::is_site_local_address(htonl(host_ipv4(171, 11, 11, 11))));\n    ASSERT_TRUE(rpc_address::is_site_local_address(htonl(host_ipv4(172, 16, 2, 2))));\n    ASSERT_TRUE(rpc_address::is_site_local_address(htonl(host_ipv4(172, 31, 234, 255))));\n    ASSERT_FALSE(rpc_address::is_site_local_address(htonl(host_ipv4(191, 128, 1, 2))));\n    ASSERT_TRUE(rpc_address::is_site_local_address(htonl(host_ipv4(192, 168, 3, 45))));\n    ASSERT_FALSE(rpc_address::is_site_local_address(htonl(host_ipv4(201, 201, 201, 201))));\n}\n\nTEST(core, is_docker_netcard)\n{\n    ASSERT_TRUE(rpc_address::is_docker_netcard(\"docker0\", htonl(host_ipv4(1, 2, 3, 4))));\n    ASSERT_TRUE(rpc_address::is_docker_netcard(\"10docker5\", htonl(host_ipv4(4, 5, 6, 8))));\n    ASSERT_FALSE(rpc_address::is_docker_netcard(\"eth0\", htonl(host_ipv4(192, 168, 123, 123))));\n    ASSERT_TRUE(rpc_address::is_docker_netcard(\"eth0\", htonl(host_ipv4(172, 17, 42, 1))));\n}\n\nTEST(core, rpc_address_to_string)\n{\n    {\n        dsn::rpc_address addr;\n        addr.assign_ipv4(host_ipv4(127, 0, 0, 1), 8080);\n        ASSERT_EQ(std::string(\"127.0.0.1:8080\"), addr.to_std_string());\n    }\n\n    {\n        const char *name = \"test_group\";\n        dsn::rpc_address addr;\n        addr.assign_group(name);\n        ASSERT_EQ(std::string(name), addr.to_std_string());\n    }\n\n    {\n        dsn::rpc_address addr;\n        ASSERT_EQ(std::string(\"invalid address\"), addr.to_std_string());\n    }\n}\n\nTEST(core, dsn_address_build)\n{\n    {\n        dsn::rpc_address addr;\n        addr.assign_ipv4(host_ipv4(127, 0, 0, 1), 8080);\n        ASSERT_EQ(HOST_TYPE_IPV4, addr.type());\n        ASSERT_EQ(host_ipv4(127, 0, 0, 1), addr.ip());\n        ASSERT_EQ(8080, addr.port());\n\n        ASSERT_TRUE(dsn::rpc_address(\"127.0.0.1\", 8080) == dsn::rpc_address(\"localhost\", 8080) ||\n                    dsn::rpc_address(\"127.0.1.1\", 8080) == dsn::rpc_address(\"localhost\", 8080));\n        ASSERT_EQ(addr, dsn::rpc_address(\"127.0.0.1\", 8080));\n        ASSERT_EQ(addr, dsn::rpc_address(host_ipv4(127, 0, 0, 1), 8080));\n    }\n\n    {\n        const char *name = \"test_group\";\n        dsn::rpc_address addr;\n        addr.assign_group(name);\n\n        ASSERT_EQ(HOST_TYPE_GROUP, addr.type());\n        ASSERT_STREQ(name, addr.group_address()->name());\n        ASSERT_EQ(1, addr.group_address()->get_count());\n    }\n}\n\nTEST(core, rpc_group_address)\n{\n    rpc_address addr(\"127.0.0.1\", 8080);\n    rpc_address invalid_addr;\n    rpc_address addr2(\"127.0.0.1\", 8081);\n\n    rpc_address t;\n    t.assign_group(\"test_group\");\n    ASSERT_EQ(HOST_TYPE_GROUP, t.type());\n    rpc_group_address *g = t.group_address();\n    ASSERT_EQ(std::string(\"test_group\"), g->name());\n    ASSERT_EQ(1, g->get_count());\n\n    // { }\n    ASSERT_FALSE(g->remove(addr));\n    ASSERT_FALSE(g->contains(addr));\n    ASSERT_EQ(0u, g->members().size());\n    ASSERT_EQ(invalid_addr, g->random_member());\n    ASSERT_EQ(invalid_addr, g->next(addr));\n    ASSERT_EQ(invalid_addr, g->leader());\n    ASSERT_EQ(invalid_addr, g->possible_leader());\n\n    // { addr }\n    ASSERT_TRUE(g->add(addr));\n    ASSERT_FALSE(g->add(addr));\n    ASSERT_TRUE(g->contains(addr));\n    ASSERT_EQ(1u, g->members().size());\n    ASSERT_EQ(addr, g->members().at(0));\n    ASSERT_EQ(addr, g->random_member());\n    ASSERT_EQ(addr, g->next(addr));\n    ASSERT_EQ(addr, g->next(invalid_addr));\n    ASSERT_EQ(addr, g->next(addr2));\n    ASSERT_EQ(invalid_addr, g->leader());\n    ASSERT_EQ(addr, g->possible_leader());\n\n    // { addr* }\n    g->set_leader(addr);\n    ASSERT_TRUE(g->contains(addr));\n    ASSERT_EQ(1u, g->members().size());\n    ASSERT_EQ(addr, g->members().at(0));\n    ASSERT_EQ(addr, g->leader());\n    ASSERT_EQ(addr, g->possible_leader());\n\n    // { addr, addr2* }\n    g->set_leader(addr2);\n    ASSERT_TRUE(g->contains(addr));\n    ASSERT_TRUE(g->contains(addr2));\n    ASSERT_EQ(2u, g->members().size());\n    ASSERT_EQ(addr, g->members().at(0));\n    ASSERT_EQ(addr2, g->members().at(1));\n    ASSERT_EQ(addr2, g->leader());\n    ASSERT_EQ(addr2, g->possible_leader());\n    ASSERT_EQ(addr, g->next(addr2));\n    ASSERT_EQ(addr2, g->next(addr));\n\n    // { addr, addr2 }\n    g->set_leader(invalid_addr);\n    ASSERT_TRUE(g->contains(addr));\n    ASSERT_TRUE(g->contains(addr2));\n    ASSERT_EQ(2u, g->members().size());\n    ASSERT_EQ(addr, g->members().at(0));\n    ASSERT_EQ(addr2, g->members().at(1));\n    ASSERT_EQ(invalid_addr, g->leader());\n\n    // { addr*, addr2 }\n    g->set_leader(addr);\n    ASSERT_TRUE(g->contains(addr));\n    ASSERT_TRUE(g->contains(addr2));\n    ASSERT_EQ(2u, g->members().size());\n    ASSERT_EQ(addr, g->members().at(0));\n    ASSERT_EQ(addr2, g->members().at(1));\n    ASSERT_EQ(addr, g->leader());\n\n    // { uri_addr }\n    ASSERT_TRUE(g->remove(addr));\n    ASSERT_FALSE(g->contains(addr));\n    ASSERT_TRUE(g->contains(addr2));\n    ASSERT_EQ(1u, g->members().size());\n    ASSERT_EQ(addr2, g->members().at(0));\n    ASSERT_EQ(invalid_addr, g->leader());\n\n    // { }\n    ASSERT_TRUE(g->remove(addr2));\n    ASSERT_FALSE(g->contains(addr2));\n    ASSERT_EQ(0u, g->members().size());\n    ASSERT_EQ(invalid_addr, g->leader());\n}\n"
  },
  {
    "path": "src/utils/test/autoref_ptr_test.cpp",
    "content": "// Copyright (c) 2012 The Chromium Authors. All rights reserved.\n// Use of this source code is governed by a BSD-style license that can be\n// found in the LICENSE file.\n\n// this test is copy from\n// https://github.com/chromium/chromium/blob/07eea964c3f60f501782d8eb51f62ca75ddf3908/base/memory/ref_counted_unittest.cc\n\n#include <type_traits>\n#include <utility>\n\n#include <gtest/gtest.h>\n#include <dsn/utility/autoref_ptr.h>\n\nnamespace {\n\nclass SelfAssign : public dsn::ref_counter\n{\nprotected:\n    virtual ~SelfAssign() = default;\n\nprivate:\n    friend class dsn::ref_counter;\n};\n\nclass Derived : public SelfAssign\n{\nprotected:\n    ~Derived() override = default;\n\nprivate:\n    friend class dsn::ref_counter;\n};\n\nclass ScopedRefPtrToSelf : public dsn::ref_counter\n{\npublic:\n    ScopedRefPtrToSelf() : self_ptr_(this) {}\n\n    static bool was_destroyed() { return was_destroyed_; }\n\n    static void reset_was_destroyed() { was_destroyed_ = false; }\n\n    dsn::ref_ptr<ScopedRefPtrToSelf> self_ptr_;\n\nprivate:\n    friend class dsn::ref_counter;\n    ~ScopedRefPtrToSelf() { was_destroyed_ = true; }\n\n    static bool was_destroyed_;\n};\n\nbool ScopedRefPtrToSelf::was_destroyed_ = false;\n\nclass ScopedRefPtrCountBase : public dsn::ref_counter\n{\npublic:\n    ScopedRefPtrCountBase() { ++constructor_count_; }\n\n    static int constructor_count() { return constructor_count_; }\n\n    static int destructor_count() { return destructor_count_; }\n\n    static void reset_count()\n    {\n        constructor_count_ = 0;\n        destructor_count_ = 0;\n    }\n\nprotected:\n    virtual ~ScopedRefPtrCountBase() { ++destructor_count_; }\n\nprivate:\n    friend class dsn::ref_counter;\n\n    static int constructor_count_;\n    static int destructor_count_;\n};\n\nint ScopedRefPtrCountBase::constructor_count_ = 0;\nint ScopedRefPtrCountBase::destructor_count_ = 0;\n\nclass ScopedRefPtrCountDerived : public ScopedRefPtrCountBase\n{\npublic:\n    ScopedRefPtrCountDerived() { ++constructor_count_; }\n\n    static int constructor_count() { return constructor_count_; }\n\n    static int destructor_count() { return destructor_count_; }\n\n    static void reset_count()\n    {\n        constructor_count_ = 0;\n        destructor_count_ = 0;\n    }\n\nprotected:\n    ~ScopedRefPtrCountDerived() override { ++destructor_count_; }\n\nprivate:\n    friend class dsn::ref_counter;\n\n    static int constructor_count_;\n    static int destructor_count_;\n};\n\nint ScopedRefPtrCountDerived::constructor_count_ = 0;\nint ScopedRefPtrCountDerived::destructor_count_ = 0;\n\nclass Other : public dsn::ref_counter\n{\nprivate:\n    friend class dsn::ref_counter;\n\n    ~Other() = default;\n};\n\ndsn::ref_ptr<Other> Overloaded(dsn::ref_ptr<Other> other) { return other; }\n\ndsn::ref_ptr<SelfAssign> Overloaded(dsn::ref_ptr<SelfAssign> self_assign) { return self_assign; }\n\nclass InitialRefCountIsOne : public dsn::ref_counter\n{\npublic:\n    InitialRefCountIsOne() = default;\n\nprivate:\n    friend class dsn::ref_counter;\n    ~InitialRefCountIsOne() = default;\n};\n\n} // end namespace\n\nTEST(RefCountedUnitTest, TestSelfAssignment)\n{\n    SelfAssign *p = new SelfAssign;\n    dsn::ref_ptr<SelfAssign> var(p);\n    // var = var;\n    EXPECT_EQ(var.get(), p);\n    // comment the following two lines because clang compiler would complain with \"-Wself-move\"\n    // var = std::move(var);\n    // EXPECT_EQ(var.get(), p);\n\n    // please uncomment these lines when swap are supported in ref_ptr\n    // var.swap(var);\n    // EXPECT_EQ(var.get(), p);\n    // swap(var, var);\n    // EXPECT_EQ(var.get(), p);\n}\n\nTEST(RefCountedUnitTest, ScopedRefPtrToSelfPointerAssignment)\n{\n    ScopedRefPtrToSelf::reset_was_destroyed();\n\n    ScopedRefPtrToSelf *check = new ScopedRefPtrToSelf();\n    EXPECT_FALSE(ScopedRefPtrToSelf::was_destroyed());\n    check->self_ptr_ = nullptr;\n    EXPECT_TRUE(ScopedRefPtrToSelf::was_destroyed());\n}\n\nTEST(RefCountedUnitTest, ScopedRefPtrToSelfMoveAssignment)\n{\n    ScopedRefPtrToSelf::reset_was_destroyed();\n\n    ScopedRefPtrToSelf *check = new ScopedRefPtrToSelf();\n    EXPECT_FALSE(ScopedRefPtrToSelf::was_destroyed());\n    // Releasing |check->self_ptr_| will delete |check|.\n    // The move assignment operator must assign |check->self_ptr_| first then\n    // release |check->self_ptr_|.\n    check->self_ptr_ = dsn::ref_ptr<ScopedRefPtrToSelf>();\n    EXPECT_TRUE(ScopedRefPtrToSelf::was_destroyed());\n}\n\nTEST(RefCountedUnitTest, BooleanTesting)\n{\n    dsn::ref_ptr<SelfAssign> ptr_to_an_instance = new SelfAssign;\n    EXPECT_TRUE(ptr_to_an_instance);\n    EXPECT_FALSE(!ptr_to_an_instance);\n\n    if (ptr_to_an_instance) {\n    } else {\n        ADD_FAILURE() << \"Pointer to an instance should result in true.\";\n    }\n\n    if (!ptr_to_an_instance) { // check for operator!().\n        ADD_FAILURE() << \"Pointer to an instance should result in !x being false.\";\n    }\n\n    dsn::ref_ptr<SelfAssign> null_ptr;\n    EXPECT_FALSE(null_ptr);\n    EXPECT_TRUE(!null_ptr);\n\n    if (null_ptr) {\n        ADD_FAILURE() << \"Null pointer should result in false.\";\n    }\n\n    if (!null_ptr) { // check for operator!().\n    } else {\n        ADD_FAILURE() << \"Null pointer should result in !x being true.\";\n    }\n}\n\nTEST(RefCountedUnitTest, Equality)\n{\n    dsn::ref_ptr<SelfAssign> p1(new SelfAssign);\n    dsn::ref_ptr<SelfAssign> p2(new SelfAssign);\n\n    EXPECT_EQ(p1, p1);\n    EXPECT_EQ(p2, p2);\n\n    EXPECT_NE(p1, p2);\n    EXPECT_NE(p2, p1);\n}\n\nTEST(RefCountedUnitTest, NullptrEquality)\n{\n    dsn::ref_ptr<SelfAssign> ptr_to_an_instance(new SelfAssign);\n    dsn::ref_ptr<SelfAssign> ptr_to_nullptr;\n\n    EXPECT_NE(nullptr, ptr_to_an_instance);\n    EXPECT_NE(ptr_to_an_instance, nullptr);\n    EXPECT_EQ(nullptr, ptr_to_nullptr);\n    EXPECT_EQ(ptr_to_nullptr, nullptr);\n}\n\nTEST(RefCountedUnitTest, ConvertibleEquality)\n{\n    dsn::ref_ptr<Derived> p1(new Derived);\n    dsn::ref_ptr<SelfAssign> p2;\n\n    EXPECT_NE(p1, p2);\n    EXPECT_NE(p2, p1);\n\n    p2 = p1;\n\n    EXPECT_EQ(p1, p2);\n    EXPECT_EQ(p2, p1);\n}\n\nTEST(RefCountedUnitTest, MoveAssignment1)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2;\n\n            p2 = std::move(p1);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(nullptr, p1.get());\n            EXPECT_EQ(raw, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignment2)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1;\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(raw);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n            p1 = std::move(p2);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(raw, p1.get());\n            EXPECT_EQ(nullptr, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignmentSameInstance1)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(p1);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n            p1 = std::move(p2);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(raw, p1.get());\n            EXPECT_EQ(nullptr, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignmentSameInstance2)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(p1);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n            p2 = std::move(p1);\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(nullptr, p1.get());\n            EXPECT_EQ(raw, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignmentDifferentInstances)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw1 = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw1);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            ScopedRefPtrCountBase *raw2 = new ScopedRefPtrCountBase();\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(raw2);\n            EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n            p1 = std::move(p2);\n            EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(raw2, p1.get());\n            EXPECT_EQ(nullptr, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(2, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignmentSelfMove)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase;\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw);\n        dsn::ref_ptr<ScopedRefPtrCountBase> &p1_ref = p1;\n\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        p1 = std::move(p1_ref);\n\n        // |p1| is \"valid but unspecified\", so don't bother inspecting its\n        // contents, just ensure that we don't crash.\n    }\n\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveAssignmentDerived)\n{\n    ScopedRefPtrCountBase::reset_count();\n    ScopedRefPtrCountDerived::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw1 = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw1);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountDerived::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n\n        {\n            ScopedRefPtrCountDerived *raw2 = new ScopedRefPtrCountDerived();\n            dsn::ref_ptr<ScopedRefPtrCountDerived> p2(raw2);\n            EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n\n            p1 = std::move(p2);\n            EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n            EXPECT_EQ(raw2, p1.get());\n            EXPECT_EQ(nullptr, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(2, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(2, ScopedRefPtrCountBase::destructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountDerived::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveConstructor)\n{\n    ScopedRefPtrCountBase::reset_count();\n\n    {\n        ScopedRefPtrCountBase *raw = new ScopedRefPtrCountBase();\n        dsn::ref_ptr<ScopedRefPtrCountBase> p1(raw);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(std::move(p1));\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(nullptr, p1.get());\n            EXPECT_EQ(raw, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n}\n\nTEST(RefCountedUnitTest, MoveConstructorDerived)\n{\n    ScopedRefPtrCountBase::reset_count();\n    ScopedRefPtrCountDerived::reset_count();\n\n    {\n        ScopedRefPtrCountDerived *raw1 = new ScopedRefPtrCountDerived();\n        dsn::ref_ptr<ScopedRefPtrCountDerived> p1(raw1);\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n        EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n\n        {\n            dsn::ref_ptr<ScopedRefPtrCountBase> p2(std::move(p1));\n            EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountBase::destructor_count());\n            EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n            EXPECT_EQ(0, ScopedRefPtrCountDerived::destructor_count());\n            EXPECT_EQ(nullptr, p1.get());\n            EXPECT_EQ(raw1, p2.get());\n\n            // p2 goes out of scope.\n        }\n        EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n        EXPECT_EQ(1, ScopedRefPtrCountDerived::destructor_count());\n\n        // p1 goes out of scope.\n    }\n    EXPECT_EQ(1, ScopedRefPtrCountBase::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountBase::destructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountDerived::constructor_count());\n    EXPECT_EQ(1, ScopedRefPtrCountDerived::destructor_count());\n}\n\nTEST(RefCountedUnitTest, TestOverloadResolutionCopy)\n{\n    const dsn::ref_ptr<Derived> derived(new Derived);\n    const dsn::ref_ptr<SelfAssign> expected(derived);\n    EXPECT_EQ(expected, Overloaded((dsn::ref_ptr<SelfAssign>)(derived)));\n\n    const dsn::ref_ptr<Other> other(new Other);\n    EXPECT_EQ(other, Overloaded((dsn::ref_ptr<Other>)other));\n}\n\nTEST(RefCountedUnitTest, TestOverloadResolutionMove)\n{\n    dsn::ref_ptr<Derived> derived(new Derived);\n    const dsn::ref_ptr<SelfAssign> expected(derived);\n    EXPECT_EQ(expected, Overloaded((dsn::ref_ptr<SelfAssign>)(std::move(derived))));\n\n    dsn::ref_ptr<Other> other(new Other);\n    const dsn::ref_ptr<Other> other2(other);\n    EXPECT_EQ(other2, Overloaded((dsn::ref_ptr<Other>)(std::move(other))));\n}\n\nTEST(RefCountedUnitTest, TestMakeRefCounted)\n{\n    dsn::ref_ptr<Derived> derived = new Derived;\n    EXPECT_TRUE(derived->get_count() == 1);\n    derived = nullptr;\n\n    dsn::ref_ptr<Derived> derived2(new Derived());\n    EXPECT_TRUE(derived2->get_count() == 1);\n    derived2 = nullptr;\n}\n\nTEST(RefCountedUnitTest, TestInitialRefCountIsOne)\n{\n    dsn::ref_ptr<InitialRefCountIsOne> obj(new InitialRefCountIsOne());\n    EXPECT_TRUE(obj->get_count() == 1);\n    obj = nullptr;\n\n    dsn::ref_ptr<InitialRefCountIsOne> obj2(new InitialRefCountIsOne);\n    EXPECT_TRUE(obj2->get_count() == 1);\n    obj2 = nullptr;\n\n    dsn::ref_ptr<Other> obj3(new Other());\n    EXPECT_TRUE(obj3->get_count() == 1);\n    obj3 = nullptr;\n}\n"
  },
  {
    "path": "src/utils/test/binary_reader_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/binary_reader.h>\n#include <gtest/gtest.h>\n#include <dsn/utility/defer.h>\n\nnamespace dsn {\n\nTEST(binary_reader_test, inner_read)\n{\n\n    {\n        blob input = blob::create_from_bytes(std::string(\"test10086\"));\n        binary_reader reader(input);\n\n        blob output;\n        int size = 4;\n        auto res = reader.inner_read(output, size);\n        ASSERT_EQ(res, size + sizeof(size));\n        ASSERT_EQ(output.to_string(), \"test\");\n    }\n\n    {\n        blob input = blob::create_from_bytes(std::string(\"test10086\"));\n        binary_reader reader(input);\n\n        blob output;\n        int size = 10;\n        auto res = reader.inner_read(output, size);\n        ASSERT_EQ(res, -1);\n    }\n\n    {\n\n        blob input = blob::create_from_bytes(std::string(\"test10086\"));\n        binary_reader reader(input);\n\n        int size = 4;\n        char *output_str = new char[size + 1];\n        auto cleanup = dsn::defer([&output_str]() { delete[] output_str; });\n        auto res = reader.inner_read(output_str, size);\n        output_str[size] = '\\0';\n        ASSERT_EQ(res, size);\n        ASSERT_EQ(std::string(output_str), \"test\");\n    }\n\n    {\n        blob input = blob::create_from_bytes(std::string(\"test10086\"));\n        binary_reader reader(input);\n\n        int size = 10;\n        char *output_str = new char[size];\n        auto cleanup = dsn::defer([&output_str]() { delete[] output_str; });\n        auto res = reader.inner_read(output_str, size);\n        ASSERT_EQ(res, -1);\n    }\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/clear.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf dsn.utils.tests.xml log*.txt\n"
  },
  {
    "path": "src/utils/test/command_manager.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for command_manager.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/tool-api/command_manager.h>\n#include <gtest/gtest.h>\n\nusing namespace ::dsn;\n\nvoid command_manager_module_init()\n{\n    dsn::command_manager::instance().register_command(\n        {\"test-cmd\"},\n        \"test-cmd - just for command_manager unit-test\",\n        \"test-cmd arg1 arg2 ...\",\n        [](const std::vector<std::string> &args) {\n            std::stringstream ss;\n            ss << \"test-cmd response: [\";\n            for (size_t i = 0; i < args.size(); ++i) {\n                if (i != 0)\n                    ss << \" \";\n                ss << args[i];\n            }\n            ss << \"]\";\n            return ss.str();\n        });\n}\n\nTEST(command_manager, exist_command)\n{\n    const std::string cmd = \"test-cmd\";\n    const std::vector<std::string> cmd_args{\"this\", \"is\", \"test\", \"argument\"};\n    std::string output;\n    dsn::command_manager::instance().run_command(cmd, cmd_args, output);\n\n    std::string expect_output = \"test-cmd response: [this is test argument]\";\n    ASSERT_EQ(output, expect_output);\n}\n\nTEST(command_manager, not_exist_command)\n{\n    const std::string cmd = \"not-exist-cmd\";\n    const std::vector<std::string> cmd_args{\"arg1\", \"arg2\"};\n    std::string output;\n    dsn::command_manager::instance().run_command(cmd, cmd_args, output);\n\n    std::string expect_output = std::string(\"unknown command '\") + cmd + \"'\";\n    ASSERT_EQ(output, expect_output);\n}\n"
  },
  {
    "path": "src/utils/test/config-bad-section.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[core] = 1\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/config-dup-key.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[core]\nrun = true\ncount = 1\ncount = 2\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/config-dup-section.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[core]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[core]\ncount = 2\n\n"
  },
  {
    "path": "src/utils/test/config-empty.ini",
    "content": ""
  },
  {
    "path": "src/utils/test/config-no-key.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[core]\n = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/config-no-section.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/config-null-section.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[ ]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/config-sample.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n#network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\narguments = localhost 20101\nports =\ndelay_seconds = 1\n\n# bool\nrun = true\nRUN = TRUE\nRun = True\nnotrun = false\n\n# string\ntype = test\n\n# list\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\n\n# double\ncount1 = 1\ncount2 = 1.2345678\n\n# int64_t\nint64_t_ok1 = 123\nint64_t_ok2 = 0xdeadbeef\nint64_t_ok3 = 9223372036854775807         #  2^63-1  max\nint64_t_ok4 = -9223372036854775808        # -2^63    min\nint64_t_ok5 = 0x7fffffffffffffff          #  2^63-1  max\n\nint64_t_bad1 = 9223372036854775808        #  2^63    max+1\nint64_t_bad2 = -9223372036854775809       # -2^63-1  min-1\nint64_t_bad3 = 1234abc\nint64_t_bad4 =\nint64_t_bad5\n\n# uint64_t\nuint64_t_ok1 = 123\nuint64_t_ok2 = 0xdeadbeef\nuint64_t_ok3 = 18446744073709551615        # 2^64-1  max\nuint64_t_ok4 = 0                           # 0       min\nuint64_t_ok5 = 0xffffffffffffffff          # 2^64-1  max\nuint64_t_ok6 = 0x0000000000000000          # 0       min\n\nuint64_t_bad1 = 18446744073709551616       # 2^64    max+1\nuint64_t_bad2 = -1                         # -1      min-1\nuint64_t_bad3 = 1234abc\nuint64_t_bad4 =\nuint64_t_bad5\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = true\ncount = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_TEST_SERVER\nreplace_data = %replace%\nshift_data = head^#middle^;tail\nhex_data = 0xdeadbeef\njust_key_data\n\n[test]\n\n"
  },
  {
    "path": "src/utils/test/config-unmatch-section.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[core\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n"
  },
  {
    "path": "src/utils/test/configuration.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <algorithm>\n#include <fstream>\n#include <gtest/gtest.h>\n\n#include <dsn/utility/configuration.h>\n\nusing namespace ::dsn;\n\nTEST(configuration, load)\n{\n    std::shared_ptr<configuration> c;\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"not_exist_config_file\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-empty.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-sample.ini\", \"a=\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-no-section.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-null-section.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-dup-section.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-unmatch-section.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-bad-section.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_FALSE(c->load(\"config-no-key.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_TRUE(c->load(\"config-dup-key.ini\"));\n\n    c.reset(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n}\n\nTEST(configuration, tool)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    bool old = c->set_warning(true);\n    ASSERT_FALSE(old);\n\n    ASSERT_STREQ(\"config-sample.ini\", c->get_file_name());\n}\n\nTEST(configuration, special_char)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\", \"replace=replace_value\"));\n\n    // %xx%\n    auto v = c->get_string_value(\"apps.server\", \"replace_data\", \"unknown\", \"for test replace\");\n    ASSERT_STREQ(\"replace_value\", v);\n\n    // ^x\n    v = c->get_string_value(\"apps.server\", \"shift_data\", \"unknown\", \"for test shift\");\n    ASSERT_STREQ(\"head#middle;tail\", v);\n}\n\nTEST(configuration, get_section_and_key)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    std::vector<std::string> sections;\n    c->get_all_sections(sections);\n    ASSERT_EQ(4u, sections.size());\n    std::sort(sections.begin(), sections.end());\n    ASSERT_EQ(\"apps..default\", sections[0]);\n    ASSERT_EQ(\"apps.client\", sections[1]);\n    ASSERT_EQ(\"apps.server\", sections[2]);\n    ASSERT_EQ(\"test\", sections[3]);\n\n    std::vector<const char *> keys;\n    c->get_all_keys(\"apps..default\", keys);\n    ASSERT_EQ(2u, keys.size());\n    std::sort(\n        keys.begin(), keys.end(), [](const char *l, const char *r) { return strcmp(l, r) < 0; });\n    ASSERT_STREQ(\"count\", keys[0]);\n    ASSERT_STREQ(\"run\", keys[1]);\n\n    c->get_all_keys(\"test\", keys);\n    ASSERT_EQ(0u, keys.size());\n}\n\nTEST(configuration, add_section_and_key)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    // add [my_section] my_key\n    auto v = c->get_string_value(\"my_section\", \"my_key\", \"my_value\", \"my key and value\");\n    ASSERT_STREQ(\"my_value\", v);\n\n    std::vector<std::string> sections;\n    c->get_all_sections(sections);\n    ASSERT_EQ(5u, sections.size());\n    std::sort(sections.begin(), sections.end());\n    ASSERT_EQ(\"apps..default\", sections[0]);\n    ASSERT_EQ(\"apps.client\", sections[1]);\n    ASSERT_EQ(\"apps.server\", sections[2]);\n    ASSERT_EQ(\"my_section\", sections[3]);\n    ASSERT_EQ(\"test\", sections[4]);\n\n    std::vector<const char *> keys;\n    c->get_all_keys(\"my_section\", keys);\n    ASSERT_EQ(1u, keys.size());\n    ASSERT_STREQ(\"my_key\", keys[0]);\n\n    // add [apps..default] my_key\n    v = c->get_string_value(\"apps..default\", \"my_key\", \"my_value\", \"my key and value\");\n    ASSERT_STREQ(\"my_value\", v);\n    v = c->get_string_value(\"apps..default\", \"my_key\", \"my_value\", \"my key and value again\");\n    ASSERT_STREQ(\"my_value\", v);\n\n    c->get_all_keys(\"apps..default\", keys);\n    ASSERT_EQ(3u, keys.size());\n    std::sort(\n        keys.begin(), keys.end(), [](const char *l, const char *r) { return strcmp(l, r) < 0; });\n    ASSERT_STREQ(\"count\", keys[0]);\n    ASSERT_STREQ(\"my_key\", keys[1]);\n    ASSERT_STREQ(\"run\", keys[2]);\n}\n\nTEST(configuration, has_section_and_key)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    ASSERT_TRUE(c->has_section(\"test\"));\n    ASSERT_FALSE(c->has_section(\"unexist_section\"));\n\n    ASSERT_TRUE(c->has_key(\"apps..default\", \"run\"));\n    ASSERT_FALSE(c->has_key(\"apps..default\", \"unexist_key\"));\n    ASSERT_FALSE(c->has_key(\"unexist_section\", \"unexist_key\"));\n}\n\nTEST(configuration, bool_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    ASSERT_TRUE(c->get_value<bool>(\"apps.client\", \"run\", false, \"client run\"));\n    ASSERT_TRUE(c->get_value<bool>(\"apps.client\", \"RUN\", false, \"client run\"));\n    ASSERT_TRUE(c->get_value<bool>(\"apps.client\", \"Run\", false, \"client run\"));\n    ASSERT_FALSE(c->get_value<bool>(\"apps.client\", \"notrun\", true, \"client not run\"));\n    ASSERT_FALSE(c->get_value<bool>(\"apps.client\", \"unexist_bool_key\", false, \"\"));\n}\n\nTEST(configuration, string_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    ASSERT_EQ(\"test\", c->get_value<std::string>(\"apps.client\", \"type\", \"\", \"\"));\n    ASSERT_EQ(\"unexist_value\",\n              c->get_value<std::string>(\"apps.client\", \"unexist_key\", \"unexist_value\", \"\"));\n}\n\nTEST(configuration, list_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    std::list<std::string> l =\n        c->get_string_value_list(\"apps.client\", \"pools\", ',', \"thread pools\");\n    ASSERT_EQ(2u, l.size());\n    ASSERT_STREQ(\"THREAD_POOL_DEFAULT\", l.begin()->c_str());\n    ASSERT_STREQ(\"THREAD_POOL_TEST_SERVER\", (++l.begin())->c_str());\n\n    l = c->get_string_value_list(\"apps.client\", \"my_list\", ',', \"my list\");\n    ASSERT_EQ(0u, l.size());\n}\n\nTEST(configuration, double_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    ASSERT_EQ(1.0, c->get_value<double>(\"apps.client\", \"count1\", 2.0, \"\"));\n    ASSERT_EQ(1.2345678, c->get_value<double>(\"apps.client\", \"count2\", 2.0, \"\"));\n    ASSERT_EQ(2.0, c->get_value<double>(\"apps.client\", \"unexist_double_key\", 2.0, \"\"));\n}\n\nTEST(configuration, int64_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    int64_t dft = 1LL << 60;\n    ASSERT_EQ(123, c->get_value<int64_t>(\"apps.client\", \"int64_t_ok1\", dft, \"\"));\n    ASSERT_EQ(0xdeadbeef, c->get_value<int64_t>(\"apps.client\", \"int64_t_ok2\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<int64_t>::max(),\n              c->get_value<int64_t>(\"apps.client\", \"int64_t_ok3\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<int64_t>::min(),\n              c->get_value<int64_t>(\"apps.client\", \"int64_t_ok4\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<int64_t>::max(),\n              c->get_value<int64_t>(\"apps.client\", \"int64_t_ok5\", dft, \"\"));\n\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"unexist_int64_t_key\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"int64_t_bad1\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"int64_t_bad2\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"int64_t_bad3\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"int64_t_bad4\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<int64_t>(\"apps.client\", \"int64_t_bad5\", dft, \"\"));\n}\n\nTEST(configuration, uint64_value)\n{\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    uint64_t dft = 1ULL << 60;\n    ASSERT_EQ(123, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok1\", dft, \"\"));\n    ASSERT_EQ(0xdeadbeef, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok2\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<uint64_t>::max(),\n              c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok3\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<uint64_t>::min(),\n              c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok4\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<uint64_t>::max(),\n              c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok5\", dft, \"\"));\n    ASSERT_EQ(std::numeric_limits<uint64_t>::min(),\n              c->get_value<uint64_t>(\"apps.client\", \"uint64_t_ok6\", dft, \"\"));\n\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"unexist_uint64_t_key\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_bad1\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_bad2\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_bad3\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_bad4\", dft, \"\"));\n    ASSERT_EQ(dft, c->get_value<uint64_t>(\"apps.client\", \"uint64_t_bad5\", dft, \"\"));\n}\n\nTEST(configuration, dump)\n{\n    // load old config\n    std::shared_ptr<configuration> c(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample.ini\"));\n\n    // add [my_section] my_key\n    auto v = c->get_string_value(\"my_section\", \"my_key\", \"my_value\", \"my key and value\");\n    ASSERT_STREQ(\"my_value\", v);\n\n    // add [apps..default] my_key\n    v = c->get_string_value(\"apps..default\", \"my_key\", \"my_value\", \"my key and value\");\n    ASSERT_STREQ(\"my_value\", v);\n\n    // dump\n    std::fstream out;\n    out.open(\"config-sample-dump.ini\", std::ios::out);\n    c->dump(out);\n    out.close();\n\n    // load new config\n    c.reset(new configuration());\n    ASSERT_TRUE(c->load(\"config-sample-dump.ini\"));\n\n    std::vector<std::string> sections;\n    c->get_all_sections(sections);\n    ASSERT_EQ(5u, sections.size());\n    std::sort(sections.begin(), sections.end());\n    ASSERT_EQ(\"apps..default\", sections[0]);\n    ASSERT_EQ(\"apps.client\", sections[1]);\n    ASSERT_EQ(\"apps.server\", sections[2]);\n    ASSERT_EQ(\"my_section\", sections[3]);\n    ASSERT_EQ(\"test\", sections[4]);\n\n    ASSERT_TRUE(!c->has_key(\"not-exsit\", \"not-exsit\"));\n    c->set(\"not-exsit\", \"not-exsit\", \"exsit\", \"kaka\");\n    ASSERT_EQ(std::string(\"exsit\"),\n              std::string(c->get_string_value(\"not-exsit\", \"not-exsit\", \"\", \"\")));\n    c->set(\"not-exsit\", \"not-exsit\", \"exsit2\", \"kaka\");\n    ASSERT_EQ(std::string(\"exsit2\"),\n              std::string(c->get_string_value(\"not-exsit\", \"not-exsit\", \"\", \"\")));\n}\n"
  },
  {
    "path": "src/utils/test/endian_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/endians.h>\n\n#include <gtest/gtest.h>\n\nusing namespace dsn;\n\nTEST(endian, conversion)\n{\n    ASSERT_EQ(100, endian::ntoh(endian::hton(uint16_t(100))));\n    ASSERT_EQ(100, endian::ntoh(endian::hton(uint32_t(100))));\n    ASSERT_EQ(100, endian::ntoh(endian::hton(uint64_t(100))));\n}\n\nTEST(endian, write_and_read)\n{\n    {\n        std::string data;\n        data.resize(4);\n        data_output(data).write_u32(100);\n        ASSERT_EQ(100, data_input(data).read_u32());\n    }\n\n    {\n        std::string data;\n        data.resize(1);\n        data_output(data).write_u8(100);\n        ASSERT_EQ(100, data_input(data).read_u8());\n    }\n\n    {\n        std::string data;\n        data.resize(1000 * 8);\n\n        data_output output(data);\n        for (uint32_t value = 1; value < 1000000; value += 1000) {\n            if (value < std::numeric_limits<uint16_t>::max()) {\n                auto val_16 = static_cast<uint16_t>(value);\n                output.write_u16(val_16);\n            } else {\n                output.write_u32(value);\n            }\n        }\n\n        data_input input(data);\n        for (uint32_t value = 1; value < 1000000; value += 1000) {\n            if (value < std::numeric_limits<uint16_t>::max()) {\n                ASSERT_EQ(value, input.read_u16());\n            } else {\n                ASSERT_EQ(value, input.read_u32());\n            }\n        }\n    }\n}\n"
  },
  {
    "path": "src/utils/test/env.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for rpc_address.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <gtest/gtest.h>\n#include <dsn/utility/rand.h>\n\nusing namespace ::dsn;\n\nTEST(core, env)\n{\n    uint64_t xs[] = {0, std::numeric_limits<uint64_t>::max() - 1, 0xdeadbeef};\n\n    for (auto &x : xs) {\n        auto r = rand::next_u64(x, x);\n        EXPECT_EQ(r, x);\n\n        r = rand::next_u64(x, x + 1);\n        EXPECT_TRUE(r == x || r == (x + 1));\n    }\n}\n"
  },
  {
    "path": "src/utils/test/fail_point_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n// Copyright 2017 PingCAP, Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//     http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include \"utils/fail_point_impl.h\"\n\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace fail {\n\nTEST(fail_point, off)\n{\n    fail_point p;\n    p.set_action(\"off\");\n    ASSERT_EQ(p.eval(), nullptr);\n}\n\nTEST(fail_point, return_test)\n{\n    fail_point p;\n    p.set_action(\"return()\");\n    ASSERT_EQ(*p.eval(), \"\");\n\n    p.set_action(\"return(test)\");\n    ASSERT_EQ(*p.eval(), \"test\");\n}\n\nTEST(fail_point, print)\n{\n    fail_point p;\n    p.set_action(\"print(test)\");\n    ASSERT_EQ(p.eval(), nullptr);\n}\n\nTEST(fail_point, frequency_and_count)\n{\n    fail_point p;\n    p.set_action(\"80%10000*return()\");\n\n    int cnt = 0;\n    double times = 0;\n    while (cnt < 10000) {\n        if (p.eval() != nullptr) {\n            cnt++;\n        }\n        times++;\n    }\n    ASSERT_TRUE(10000 / 0.9 < times);\n    ASSERT_TRUE(10000 / 0.7 > times);\n\n    for (int i = 0; i < times; i++) {\n        ASSERT_EQ(p.eval(), nullptr);\n    }\n}\n\nTEST(fail_point, parse)\n{\n    fail_point p;\n\n    p.set_action(\"return(64)\");\n    ASSERT_EQ(p, fail_point(fail_point::Return, \"64\", 100, -1));\n\n    p = fail_point();\n    p.set_action(\"5*return\");\n    ASSERT_EQ(p, fail_point(fail_point::Return, \"\", 100, 5));\n\n    p = fail_point();\n    p.set_action(\"125%2*return\");\n    ASSERT_EQ(p, fail_point(fail_point::Return, \"\", 125, 2));\n\n    p = fail_point();\n    p.set_action(\"return(2%5)\");\n    ASSERT_EQ(p, fail_point(fail_point::Return, \"2%5\", 100, -1));\n\n    p = fail_point();\n    p.set_action(\"125%2*off\");\n    ASSERT_EQ(p, fail_point(fail_point::Off, \"\", 125, 2));\n\n    p = fail_point();\n    p.set_action(\"125%2*print\");\n    ASSERT_EQ(p, fail_point(fail_point::Print, \"\", 125, 2));\n\n    ASSERT_FALSE(p.parse_from_string(\"delay\"));\n    ASSERT_FALSE(p.parse_from_string(\"ab%return\"));\n    ASSERT_FALSE(p.parse_from_string(\"ab*return\"));\n    ASSERT_FALSE(p.parse_from_string(\"return(msg\"));\n    ASSERT_FALSE(p.parse_from_string(\"unknown\"));\n}\n\nint test_func()\n{\n    FAIL_POINT_INJECT_F(\"test_1\", [](string_view str) -> int {\n        EXPECT_EQ(str, \"1\");\n        return 1;\n    });\n\n    FAIL_POINT_INJECT_F(\"test_2\", [](string_view str) -> int {\n        EXPECT_EQ(str, \"2\");\n        return 2;\n    });\n\n    return 0;\n}\nTEST(fail_point, macro_use)\n{\n    setup();\n\n    cfg(\"test_1\", \"1*return(1)\");\n    ASSERT_EQ(test_func(), 1);\n\n    cfg(\"test_2\", \"1*return(2)\");\n    ASSERT_EQ(test_func(), 2);\n\n    ASSERT_EQ(test_func(), 0);\n\n    teardown();\n}\n\nvoid test_func_return_void(int &a)\n{\n    FAIL_POINT_INJECT_F(\"test_1\", [](string_view str) {});\n    a++;\n}\nTEST(fail_point, return_void)\n{\n    setup();\n\n    int a = 0;\n    cfg(\"test_1\", \"1*return()\");\n    test_func_return_void(a);\n    ASSERT_EQ(a, 0);\n\n    cfg(\"test_1\", \"off\");\n    test_func_return_void(a);\n    ASSERT_EQ(a, 1);\n\n    teardown();\n}\n\n} // namespace fail\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/file_system_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/filesystem.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace utils {\nnamespace filesystem {\n\nTEST(verify_file, verify_file_test)\n{\n    const std::string &fname = \"test_file\";\n    std::string expected_md5;\n    int64_t expected_fsize;\n    create_file(fname);\n    md5sum(fname, expected_md5);\n    file_size(fname, expected_fsize);\n\n    ASSERT_TRUE(verify_file(fname, expected_md5, expected_fsize));\n    ASSERT_FALSE(verify_file(fname, \"wrong_md5\", 10086));\n    ASSERT_FALSE(verify_file(\"file_not_exists\", \"wrong_md5\", 10086));\n\n    remove_path(fname);\n}\n\n} // namespace filesystem\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/file_utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/utils.h>\n#include <dsn/utility/filesystem.h>\n#include <fstream>\n\nstatic void file_utils_test_setup()\n{\n    std::string path;\n    bool ret;\n\n    path = \"./file_utils_temp.txt\";\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"./file_utils_temp\";\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_FALSE(ret);\n}\n\nstatic void file_utils_test_get_process_image_path()\n{\n    std::string path;\n    std::string imagepath;\n    dsn::error_code ret;\n    // int pid;\n\n    if (!dsn::utils::filesystem::get_current_directory(imagepath)) {\n        EXPECT_TRUE(false);\n    }\n    imagepath = dsn::utils::filesystem::path_combine(imagepath, \"dsn_utils_tests\");\n\n    ret = dsn::utils::filesystem::get_current_process_image_path(path);\n    EXPECT_TRUE(ret == dsn::ERR_OK);\n    // TODO: not always true when running dir is not where the test resides\n    // EXPECT_TRUE(path == imagepath); // e: vs E:\n}\n\nstatic void file_utils_test_get_normalized_path()\n{\n    int ret;\n    std::string path;\n    std::string npath;\n\n    path = \"\\\\\\\\?\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"c:\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"c:\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\?\\\\c:\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\?\\\\c:\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"c:\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"c:\\\\\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"c:\\\\\\\\a\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"c:\\\\\\\\a\\\\\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\?\\\\c:\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\?\\\\c:\\\\\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\?\\\\c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\?\\\\c:\\\\\\\\a\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\?\\\\c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\?\\\\c:\\\\\\\\a\\\\\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\?\\\\c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"\\\\\\\\\\\\a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\a\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\\\\\a\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"\\\\\\\\\\\\a\\\\\\\\\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"//\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\\");\n#else\n    EXPECT_TRUE(npath == \"/\");\n#endif\n\n    path = \"//?/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\?\\\\\");\n#else\n    EXPECT_TRUE(npath == \"/?\");\n#endif\n\n    path = \"//a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"/a\");\n#endif\n\n    path = \"//a/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"/a\");\n#endif\n\n    path = \"//a//\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"/a\");\n#endif\n\n    path = \"c:/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\\");\n#else\n    EXPECT_TRUE(npath == \"c:\");\n#endif\n\n    path = \"c://\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\\");\n#else\n    EXPECT_TRUE(npath == \"c:\");\n#endif\n\n    path = \"c:/a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"c:/a/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"c:/a\");\n#endif\n\n    path = \"c://a/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"c:/a\");\n#endif\n\n    path = \"c://a//\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"c:\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"c:/a\");\n#endif\n\n    path = \"/////////////////////////////////////////////////////////////////\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\\");\n#else\n    EXPECT_TRUE(npath == \"/\");\n#endif\n\n    path = \"/////////////////////////////////////////////////////////////////a/////////////////b///\"\n           \"////////\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"\\\\\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == \"/a/b\");\n#endif\n\n    path = \".\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"./\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == \".\");\n\n    path = \"./a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \".//a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"./a\");\n#endif\n\n    path = \"./a/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"./a\");\n#endif\n\n    path = \"./a/b\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"./a/b/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == \"./a/b\");\n#endif\n\n    path = \".///a////b///\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \".\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == \"./a/b\");\n#endif\n\n    path = \"..\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == path);\n\n    path = \"../\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n    EXPECT_TRUE(npath == \"..\");\n\n    path = \"../a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"..//a\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"../a\");\n#endif\n\n    path = \"../a/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\");\n#else\n    EXPECT_TRUE(npath == \"../a\");\n#endif\n\n    path = \"../a/b\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == path);\n#endif\n\n    path = \"../a/b/\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == \"../a/b\");\n#endif\n\n    path = \"..///a////b///\";\n    ret = dsn::utils::filesystem::get_normalized_path(path, npath);\n    EXPECT_TRUE(ret == 0);\n#ifdef _WIN32\n    EXPECT_TRUE(npath == \"..\\\\a\\\\b\");\n#else\n    EXPECT_TRUE(npath == \"../a/b\");\n#endif\n}\n\nstatic void file_utils_test_get_current_directory()\n{\n    std::string path;\n    bool ret;\n\n    path = \"\";\n    ret = dsn::utils::filesystem::get_current_directory(path);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(!path.empty());\n}\n\nstatic void file_utils_test_path_combine()\n{\n    std::string path;\n    std::string path1;\n    std::string path2;\n\n    path1 = \"\";\n    path2 = \"\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n    EXPECT_TRUE(path == \"\");\n\n    path1 = \"c:\";\n    path2 = \"Windows\\\\explorer.exe\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"c:Windows\\\\explorer.exe\");\n#else\n    EXPECT_TRUE(path == \"c:/Windows\\\\explorer.exe\");\n#endif\n\n    path1 = \"c:\";\n    path2 = \"\\\\Windows\\\\explorer.exe\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"c:\\\\Windows\\\\explorer.exe\");\n#else\n    EXPECT_TRUE(path == \"c:/Windows\\\\explorer.exe\");\n#endif\n\n    path1 = \"c:\\\\\";\n    path2 = \"\\\\Windows\\\\explorer.exe\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"c:\\\\Windows\\\\explorer.exe\");\n#else\n    EXPECT_TRUE(path == \"c:\\\\/Windows\\\\explorer.exe\");\n#endif\n\n    path1 = \"/bin\";\n    path2 = \"ls\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"\\\\bin\\\\ls\");\n#else\n    EXPECT_TRUE(path == \"/bin/ls\");\n#endif\n\n    path1 = \"/bin/\";\n    path2 = \"ls\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"\\\\bin\\\\ls\");\n#else\n    EXPECT_TRUE(path == \"/bin/ls\");\n#endif\n\n    path1 = \"/bin\";\n    path2 = \"/ls\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"\\\\bin\\\\ls\");\n#else\n    EXPECT_TRUE(path == \"/bin/ls\");\n#endif\n\n    path1 = \"/bin/\";\n    path2 = \"/ls\";\n    path = dsn::utils::filesystem::path_combine(path1, path2);\n#ifdef _WIN32\n    EXPECT_TRUE(path == \"\\\\bin\\\\ls\");\n#else\n    EXPECT_TRUE(path == \"/bin/ls\");\n#endif\n}\n\nstatic void file_utils_test_get_file_name()\n{\n    std::string path1;\n    std::string path2;\n\n    path1 = \"\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"\");\n\n    path1 = \"c:\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n#ifdef _WIN32\n    EXPECT_TRUE(path2 == \"\");\n#else\n    EXPECT_TRUE(path2 == \"c:\");\n#endif\n\n    path1 = \"c:\\\\\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"\");\n\n    path1 = \"c:1.txt\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n#ifdef _WIN32\n    EXPECT_TRUE(path2 == \"1.txt\");\n#else\n    EXPECT_TRUE(path2 == \"c:1.txt\");\n#endif\n\n    path1 = \"c:\\\\1.txt\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"1.txt\");\n\n    path1 = \"c:\\\\Windows\\\\1.txt\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"1.txt\");\n\n    path1 = \"/bin/\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"\");\n\n    path1 = \"/bin/ls\";\n    path2 = dsn::utils::filesystem::get_file_name(path1);\n    EXPECT_TRUE(path2 == \"ls\");\n}\n\nstatic void file_utils_test_create()\n{\n    std::string path;\n    bool ret;\n\n    path = \"./file_utils_temp.txt\";\n    ret = dsn::utils::filesystem::create_file(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_TRUE(ret);\n\n    time_t current_time = ::time(nullptr);\n    EXPECT_TRUE(current_time != 1);\n\n    std::ofstream myfile(path.c_str(), std::ios::out | std::ios::app | std::ios::binary);\n    EXPECT_TRUE(myfile.is_open());\n    myfile << \"Hello world!\";\n    myfile.close();\n\n    time_t last_write_time;\n    ret = dsn::utils::filesystem::last_write_time(path, last_write_time);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE((last_write_time != -1) && (last_write_time >= current_time));\n\n    path = \"./file_utils_temp\";\n    ret = dsn::utils::filesystem::create_directory(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/a/b/c/d//\";\n    ret = dsn::utils::filesystem::create_directory(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/a/1.txt\";\n    ret = dsn::utils::filesystem::create_file(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/a/1.txt\";\n    ret = dsn::utils::filesystem::create_file(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/a/2.txt\";\n    ret = dsn::utils::filesystem::create_file(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/b/c/d/1.txt\";\n    ret = dsn::utils::filesystem::create_file(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_TRUE(ret);\n}\n\nstatic void file_utils_test_file_size()\n{\n    std::string path;\n    int64_t sz;\n    bool ret;\n\n    path = \"./file_utils_temp.txt\";\n    ret = dsn::utils::filesystem::file_size(path, sz);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(sz == 12);\n\n    path = \"./file_utils_temp2.txt\";\n    ret = dsn::utils::filesystem::file_size(path, sz);\n    EXPECT_FALSE(ret);\n}\n\nstatic void file_utils_test_path_exists()\n{\n    std::string path;\n    bool ret;\n\n    path = \"c:\\\\\";\n    ret = dsn::utils::filesystem::path_exists(path);\n#ifdef _WIN32\n    EXPECT_TRUE(ret);\n#else\n    EXPECT_FALSE(ret);\n#endif\n\n    path = \"c:\\\\\";\n    ret = dsn::utils::filesystem::directory_exists(path);\n#ifdef _WIN32\n    EXPECT_TRUE(ret);\n#else\n    EXPECT_FALSE(ret);\n#endif\n\n    path = \"c:\\\\\";\n    ret = dsn::utils::filesystem::file_exists(path);\n#ifdef _WIN32\n    EXPECT_FALSE(ret);\n#else\n    EXPECT_FALSE(ret);\n#endif\n\n    path = \"/\";\n    ret = dsn::utils::filesystem::path_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"/\";\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_TRUE(ret);\n\n    path = \"/\";\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"./not_exists_not_exists\";\n    ret = dsn::utils::filesystem::path_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"c:\\\\Windows\\\\System32\\\\notepad.exe\";\n    ret = dsn::utils::filesystem::path_exists(path);\n#ifdef _WIN32\n    EXPECT_TRUE(ret);\n#else\n    EXPECT_FALSE(ret);\n#endif\n\n    path = \"c:\\\\Windows\\\\System32\\\\notepad.exe\";\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"c:\\\\Windows\\\\System32\\\\notepad.exe\";\n    ret = dsn::utils::filesystem::file_exists(path);\n#ifdef _WIN32\n    EXPECT_TRUE(ret);\n#else\n    EXPECT_FALSE(ret);\n#endif\n\n    path = \"/bin/ls\";\n    ret = dsn::utils::filesystem::path_exists(path);\n#ifdef _WIN32\n    EXPECT_FALSE(ret);\n#else\n    EXPECT_TRUE(ret);\n#endif\n\n    path = \"/bin/ls\";\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"/bin/ls\";\n    ret = dsn::utils::filesystem::file_exists(path);\n#ifdef _WIN32\n    EXPECT_FALSE(ret);\n#else\n    EXPECT_TRUE(ret);\n#endif\n}\n\nstatic void file_utils_test_get_paths()\n{\n    std::string path;\n    bool ret;\n    std::vector<std::string> file_list;\n\n    path = \".\";\n    ret = dsn::utils::filesystem::get_subfiles(path, file_list, false);\n    EXPECT_TRUE(ret);\n#ifdef _WIN32\n    EXPECT_TRUE(file_list.size() >= 3);\n#else\n    EXPECT_TRUE(file_list.size() >= 2);\n#endif\n    file_list.clear();\n\n    path = \".\";\n    ret = dsn::utils::filesystem::get_subfiles(path, file_list, true);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() >= 3);\n    file_list.clear();\n\n    path = \"../../\";\n    ret = dsn::utils::filesystem::get_subfiles(path, file_list, true);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() >= 3);\n    file_list.clear();\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::get_subfiles(path, file_list, true);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 3);\n    file_list.clear();\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::get_subdirectories(path, file_list, true);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 7);\n    file_list.clear();\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::get_subdirectories(path, file_list, false);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 2);\n    file_list.clear();\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::get_subpaths(path, file_list, true);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 10);\n    file_list.clear();\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::get_subpaths(path, file_list, false);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 2);\n    file_list.clear();\n\n    path = \"./file_utils_temp/a/\";\n    ret = dsn::utils::filesystem::get_subfiles(path, file_list, false);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 2);\n    file_list.clear();\n\n    path = \"./file_utils_temp/a/\";\n    ret = dsn::utils::filesystem::get_subpaths(path, file_list, false);\n    EXPECT_TRUE(ret);\n    EXPECT_TRUE(file_list.size() == 3);\n    file_list.clear();\n}\n\nstatic void file_utils_test_rename()\n{\n    std::string path;\n    std::string path2;\n    bool ret;\n\n    path = \"./file_utils_temp/b/c/d/1.txt\";\n    path2 = \"./file_utils_temp/b/c/d/2.txt\";\n    ret = dsn::utils::filesystem::rename_path(path, path2);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_FALSE(ret);\n    ret = dsn::utils::filesystem::file_exists(path2);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::rename_path(path, path2);\n    EXPECT_FALSE(ret);\n}\n\nstatic void file_utils_test_remove()\n{\n    std::string path;\n    std::vector<std::string> file_list;\n    bool ret;\n\n    path = \"./file_utils_temp.txt\";\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::file_exists(path);\n    EXPECT_FALSE(ret);\n\n    path = \"./file_utils_temp/a/2.txt\";\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n\n    path = \"./file_utils_temp/\";\n    ret = dsn::utils::filesystem::remove_path(path);\n    EXPECT_TRUE(ret);\n    ret = dsn::utils::filesystem::directory_exists(path);\n    EXPECT_FALSE(ret);\n}\n\nstatic void file_utils_test_cleanup() {}\n\nTEST(core, file_utils)\n{\n    file_utils_test_setup();\n    file_utils_test_get_process_image_path();\n    file_utils_test_get_normalized_path();\n    file_utils_test_get_current_directory();\n    file_utils_test_path_combine();\n    file_utils_test_get_file_name();\n    file_utils_test_create();\n    file_utils_test_file_size();\n    file_utils_test_path_exists();\n    file_utils_test_get_paths();\n    file_utils_test_rename();\n    file_utils_test_remove();\n    file_utils_test_cleanup();\n}\n"
  },
  {
    "path": "src/utils/test/flag_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utility/flags.h>\n#include <fmt/format.h>\n\nnamespace dsn {\nnamespace utils {\n\nDSN_DEFINE_int32(\"flag_test\", test_int32, 5, \"\");\nDSN_TAG_VARIABLE(test_int32, FT_MUTABLE);\n\nDSN_DEFINE_uint32(\"flag_test\", test_uint32, 5, \"\");\nDSN_TAG_VARIABLE(test_uint32, FT_MUTABLE);\n\nDSN_DEFINE_int64(\"flag_test\", test_int64, 5, \"\");\nDSN_TAG_VARIABLE(test_int64, FT_MUTABLE);\n\nDSN_DEFINE_uint64(\"flag_test\", test_uint64, 5, \"\");\nDSN_TAG_VARIABLE(test_uint64, FT_MUTABLE);\n\nDSN_DEFINE_double(\"flag_test\", test_double, 5.0, \"\");\nDSN_TAG_VARIABLE(test_double, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"flag_test\", test_bool, true, \"\");\nDSN_TAG_VARIABLE(test_bool, FT_MUTABLE);\n\nDSN_DEFINE_string(\"flag_test\", test_string_immutable, \"immutable_string\", \"\");\n\nDSN_DEFINE_int32(\"flag_test\", test_validator, 10, \"\");\nDSN_TAG_VARIABLE(test_validator, FT_MUTABLE);\nDSN_DEFINE_validator(test_validator, [](int32_t test_validator) -> bool {\n    if (test_validator < 0) {\n        return false;\n    }\n    return true;\n});\n\nDSN_DEFINE_bool(\"flag_test\", condition_a, false, \"\");\nDSN_TAG_VARIABLE(condition_a, FT_MUTABLE);\n\nDSN_DEFINE_bool(\"flag_test\", condition_b, false, \"\");\nDSN_TAG_VARIABLE(condition_b, FT_MUTABLE);\n\nDSN_DEFINE_group_validator(inconsistent_conditions, [](std::string &message) -> bool {\n    return !FLAGS_condition_a || !FLAGS_condition_b;\n});\n\nDSN_DEFINE_int32(\"flag_test\", min_value, 1, \"\");\nDSN_TAG_VARIABLE(min_value, FT_MUTABLE);\nDSN_DEFINE_validator(min_value, [](int32_t value) -> bool { return value > 0; });\n\nDSN_DEFINE_int32(\"flag_test\", max_value, 5, \"\");\nDSN_TAG_VARIABLE(max_value, FT_MUTABLE);\nDSN_DEFINE_validator(max_value, [](int32_t value) -> bool { return value <= 10; });\n\nDSN_DEFINE_group_validator(min_max, [](std::string &message) -> bool {\n    if (FLAGS_min_value > FLAGS_max_value) {\n        message = fmt::format(\"min({}) should be <= max({})\", FLAGS_min_value, FLAGS_max_value);\n        return false;\n    }\n    return true;\n});\n\nDSN_DEFINE_int32(\"flag_test\", small_value, 0, \"\");\nDSN_TAG_VARIABLE(small_value, FT_MUTABLE);\n\nDSN_DEFINE_int32(\"flag_test\", medium_value, 5, \"\");\nDSN_TAG_VARIABLE(medium_value, FT_MUTABLE);\n\nDSN_DEFINE_int32(\"flag_test\", large_value, 10, \"\");\nDSN_TAG_VARIABLE(large_value, FT_MUTABLE);\n\nDSN_DEFINE_group_validator(small_medium_large, [](std::string &message) -> bool {\n    if (FLAGS_small_value >= FLAGS_medium_value) {\n        message =\n            fmt::format(\"small({}) should be < medium({})\", FLAGS_small_value, FLAGS_medium_value);\n        return false;\n    }\n\n    if (FLAGS_medium_value >= FLAGS_large_value) {\n        message =\n            fmt::format(\"medium({}) should be < large({})\", FLAGS_medium_value, FLAGS_large_value);\n        return false;\n    }\n\n    return true;\n});\n\nDSN_DEFINE_int32(\"flag_test\", lesser, 0, \"\");\nDSN_TAG_VARIABLE(lesser, FT_MUTABLE);\n\nDSN_DEFINE_int32(\"flag_test\", greater_0, 5, \"\");\nDSN_TAG_VARIABLE(greater_0, FT_MUTABLE);\n\nDSN_DEFINE_int32(\"flag_test\", greater_1, 10, \"\");\nDSN_TAG_VARIABLE(greater_1, FT_MUTABLE);\n\nDSN_DEFINE_group_validator(lesser_greater_0, [](std::string &message) -> bool {\n    if (FLAGS_lesser >= FLAGS_greater_0) {\n        message =\n            fmt::format(\"lesser({}) should be < greater_0({})\", FLAGS_lesser, FLAGS_greater_0);\n        return false;\n    }\n\n    return true;\n});\n\nDSN_DEFINE_group_validator(lesser_greater_1, [](std::string &message) -> bool {\n    if (FLAGS_lesser >= FLAGS_greater_1) {\n        message =\n            fmt::format(\"lesser({}) should be < greater_1({})\", FLAGS_lesser, FLAGS_greater_1);\n        return false;\n    }\n\n    return true;\n});\n\nTEST(flag_test, update_config)\n{\n    auto res = update_flag(\"test_int32\", \"3\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_int32, 3);\n\n    res = update_flag(\"test_uint32\", \"3\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_uint32, 3);\n\n    res = update_flag(\"test_int64\", \"3\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_int64, 3);\n\n    res = update_flag(\"test_uint64\", \"3\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_uint64, 3);\n\n    res = update_flag(\"test_double\", \"3.0\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_double, 3.0);\n\n    res = update_flag(\"test_bool\", \"false\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_FALSE(FLAGS_test_bool);\n\n    // string modifications are not supported\n    res = update_flag(\"test_string_immutable\", \"update_string\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(strcmp(FLAGS_test_string_immutable, \"immutable_string\"), 0);\n\n    // test flag is not exist\n    res = update_flag(\"test_not_exist\", \"test_string\");\n    ASSERT_EQ(res.code(), ERR_OBJECT_NOT_FOUND);\n\n    // test to update invalid value\n    res = update_flag(\"test_int32\", \"3ab\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_test_int32, 3);\n\n    // validation succeed\n    res = update_flag(\"test_validator\", \"5\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_test_validator, 5);\n\n    // validation failed\n    res = update_flag(\"test_validator\", \"-1\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_test_validator, 5);\n\n    // successful detection with consistent conditions\n    res = update_flag(\"condition_a\", \"true\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_condition_a, true);\n\n    // failed detection with mutually exclusive conditions\n    res = update_flag(\"condition_b\", \"true\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_condition_b, false);\n    std::cout << res.description() << std::endl;\n\n    // successful detection between 2 flags\n    res = update_flag(\"max_value\", \"6\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_max_value, 6);\n\n    // failed detection between 2 flags with each individual validation\n    res = update_flag(\"min_value\", \"0\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_min_value, 1);\n    std::cout << res.description() << std::endl;\n\n    // failed detection between 2 flags within a grouped validator\n    res = update_flag(\"min_value\", \"7\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_min_value, 1);\n    std::cout << res.description() << std::endl;\n\n    // successful detection among 3 flags within a grouped validator\n    res = update_flag(\"medium_value\", \"6\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_medium_value, 6);\n\n    // failed detection among 3 flags within a grouped validator\n    res = update_flag(\"medium_value\", \"0\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_medium_value, 6);\n    std::cout << res.description() << std::endl;\n\n    // failed detection among 3 flags within a grouped validator\n    res = update_flag(\"medium_value\", \"10\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_medium_value, 6);\n    std::cout << res.description() << std::endl;\n\n    // successful detection among 3 flags within both 2 grouped validators\n    res = update_flag(\"lesser\", \"1\");\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(FLAGS_lesser, 1);\n\n    // failed detection among 3 flags within one of 2 grouped validators\n    res = update_flag(\"lesser\", \"6\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_lesser, 1);\n    std::cout << res.description() << std::endl;\n\n    // failed detection among 3 flags within both 2 grouped validators\n    res = update_flag(\"lesser\", \"11\");\n    ASSERT_EQ(res.code(), ERR_INVALID_PARAMETERS);\n    ASSERT_EQ(FLAGS_lesser, 1);\n    std::cout << res.description() << std::endl;\n}\n\nDSN_DEFINE_int32(\"flag_test\", has_tag, 5, \"\");\nDSN_TAG_VARIABLE(has_tag, FT_MUTABLE);\n\nDSN_DEFINE_int32(\"flag_test\", no_tag, 5, \"\");\n\nTEST(flag_test, tag_flag)\n{\n    // has tag\n    auto res = has_tag(\"has_tag\", flag_tag::FT_MUTABLE);\n    ASSERT_TRUE(res);\n\n    // doesn't has tag\n    res = has_tag(\"no_tag\", flag_tag::FT_MUTABLE);\n    ASSERT_FALSE(res);\n\n    // flag is not exist\n    res = has_tag(\"no_flag\", flag_tag::FT_MUTABLE);\n    ASSERT_FALSE(res);\n}\n\nDSN_DEFINE_int32(\"flag_test\", get_flag_int32, 5, \"test get_flag_int32\");\nDSN_TAG_VARIABLE(get_flag_int32, FT_MUTABLE);\nDSN_DEFINE_uint32(\"flag_test\", get_flag_uint32, 5, \"test get_flag_uint32\");\nDSN_TAG_VARIABLE(get_flag_uint32, FT_MUTABLE);\nDSN_DEFINE_int64(\"flag_test\", get_flag_int64, 5, \"test get_flag_int64\");\nDSN_TAG_VARIABLE(get_flag_int64, FT_MUTABLE);\nDSN_DEFINE_uint64(\"flag_test\", get_flag_uint64, 5, \"test get_flag_uint64\");\nDSN_TAG_VARIABLE(get_flag_uint64, FT_MUTABLE);\nDSN_DEFINE_double(\"flag_test\", get_flag_double, 5.12, \"test get_flag_double\");\nDSN_TAG_VARIABLE(get_flag_double, FT_MUTABLE);\nDSN_DEFINE_bool(\"flag_test\", get_flag_bool, true, \"test get_flag_bool\");\nDSN_TAG_VARIABLE(get_flag_bool, FT_MUTABLE);\nDSN_DEFINE_string(\"flag_test\", get_flag_string, \"flag_string\", \"test get_flag_string\");\nDSN_TAG_VARIABLE(get_flag_string, FT_MUTABLE);\n\nTEST(flag_test, get_config)\n{\n    auto res = get_flag_str(\"get_flag_not_exist\");\n    ASSERT_EQ(res.get_error().code(), ERR_OBJECT_NOT_FOUND);\n\n    std::string test_app = \"get_flag_int32\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_INT32\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_int32\",\"value\":\")\" +\n            std::to_string(FLAGS_get_flag_int32) + R\"(\"})\" + \"\\n\");\n\n    test_app = \"get_flag_uint32\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_UINT32\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_uint32\",\"value\":\")\" +\n            std::to_string(FLAGS_get_flag_uint32) + R\"(\"})\" + \"\\n\");\n\n    test_app = \"get_flag_int64\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_INT64\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_int64\",\"value\":\")\" +\n            std::to_string(FLAGS_get_flag_int64) + R\"(\"})\" + \"\\n\");\n\n    test_app = \"get_flag_uint64\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_UINT64\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_uint64\",\"value\":\")\" +\n            std::to_string(FLAGS_get_flag_uint64) + R\"(\"})\" + \"\\n\");\n\n    test_app = \"get_flag_double\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_DOUBLE\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_double\",\"value\":\"5.12\"})\" +\n            \"\\n\");\n\n    test_app = \"get_flag_bool\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_BOOL\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_bool\",\"value\":\"true\"})\"\n            \"\\n\");\n\n    test_app = \"get_flag_string\";\n    res = get_flag_str(test_app);\n    ASSERT_TRUE(res.is_ok());\n    ASSERT_EQ(\n        res.get_value(),\n        R\"({\"name\":\")\" + test_app +\n            R\"(\",\"section\":\"flag_test\",\"type\":\"FV_STRING\",\"tags\":\"flag_tag::FT_MUTABLE\",\"description\":\"test get_flag_string\",\"value\":\")\" +\n            FLAGS_get_flag_string + R\"(\"})\" + \"\\n\");\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/fmt_logging_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/string_view.h>\n#include <dsn/utility/errors.h>\n#include <dsn/dist/fmt_logging.h>\n#include <gtest/gtest.h>\n#include <dsn/dist/replication/replication.codes.h>\n\nnamespace dsn {\nnamespace replication {\n\nTEST(fmt_logging, basic)\n{\n    ASSERT_EQ(fmt::format(\"{}\", gpid(1, 1)), \"1.1\");\n    ASSERT_EQ(fmt::format(\"{}\", error_s::ok()), \"ERR_OK\");\n    ASSERT_EQ(fmt::format(\"{}\", error_s::make(ERR_TIMEOUT, \"yes\")), \"ERR_TIMEOUT: yes\");\n    ASSERT_EQ(fmt::format(\"{}\", ERR_OK), \"ERR_OK\");\n    ASSERT_EQ(fmt::format(\"{}\", LPC_REPLICATION_LOW), \"LPC_REPLICATION_LOW\");\n    ASSERT_EQ(string_view(\"yes\"), \"yes\");\n    ASSERT_EQ(fmt::format(\"{}\", string_view(\"yes\\0yes\")), \"yes\\0yes\");\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/hostname_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/utils.h>\n\n#include <dsn/tool-api/rpc_address.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace replication {\n\nTEST(ip_to_hostname, ipv4_validate)\n{\n    rpc_address rpc_test_ipv4;\n    struct ip_test\n    {\n        std::string ip;\n        bool result;\n    } tests[] = {{\"127.0.0.1:8080\", true},\n                 {\"172.16.254.1:1234\", true},\n                 {\"172.16.254.1:222222\", false},\n                 {\"172.16.254.1\", false},\n                 {\"2222,123,33,1:8080\", false},\n                 {\"123.456.789.1:8080\", false},\n                 {\"001.223.110.002:8080\", false},\n                 {\"172.16.254.1.8080\", false},\n                 {\"172.16.254.1:8080.\", false},\n                 {\"127.0.0.11:123!\", false},\n                 {\"127.0.0.11:123\", true},\n                 {\"localhost:34601\", true},\n                 {\"localhost:3460100022212312312213\", false},\n                 {\"localhost:-12\", false},\n                 {\"localhost:1@2\", false}};\n\n    for (auto test : tests) {\n        ASSERT_EQ(rpc_test_ipv4.from_string_ipv4(test.ip.c_str()), test.result);\n    }\n}\n\nTEST(ip_to_hostname, localhost)\n{\n    std::string hostname_result;\n\n    const std::string valid_ip = \"127.0.0.1\";\n    const std::string expected_hostname = \"localhost\";\n\n    const std::string valid_ip_port = \"127.0.0.1:23010\";\n    const std::string expected_hostname_port = \"localhost:23010\";\n\n    const std::string valid_ip_list = \"127.0.0.1,127.0.0.1,127.0.0.1\";\n    const std::string expected_hostname_list = \"localhost,localhost,localhost\";\n\n    const std::string valid_ip_port_list = \"127.0.0.1:8080,127.0.0.1:8080,127.0.0.1:8080\";\n    const std::string expected_hostname_port_list = \"localhost:8080,localhost:8080,localhost:8080\";\n\n    rpc_address rpc_example_valid;\n    rpc_example_valid.assign_ipv4(valid_ip.c_str(), 23010);\n\n    // static bool hostname(const rpc_address &address,std::string *hostname_result);\n    ASSERT_TRUE(dsn::utils::hostname(rpc_example_valid, &hostname_result));\n    ASSERT_EQ(expected_hostname_port, hostname_result);\n\n    // static bool hostname_from_ip(uint32_t ip, std::string* hostname_result);\n    ASSERT_TRUE(dsn::utils::hostname_from_ip(htonl(rpc_example_valid.ip()), &hostname_result));\n    ASSERT_EQ(expected_hostname, hostname_result);\n\n    // static bool hostname_from_ip(const char *ip,std::string *hostname_result);\n    ASSERT_TRUE(dsn::utils::hostname_from_ip(valid_ip.c_str(), &hostname_result));\n    ASSERT_EQ(expected_hostname, hostname_result);\n\n    // static bool hostname_from_ip_port(const char *ip_port,std::string *hostname_result);\n    ASSERT_TRUE(dsn::utils::hostname_from_ip_port(valid_ip_port.c_str(), &hostname_result));\n    ASSERT_EQ(expected_hostname_port, hostname_result);\n\n    // static bool list_hostname_from_ip(const char *ip_port_list,std::string\n    // *hostname_result_list);\n    ASSERT_TRUE(dsn::utils::list_hostname_from_ip(valid_ip_list.c_str(), &hostname_result));\n    ASSERT_EQ(expected_hostname_list, hostname_result);\n\n    ASSERT_FALSE(dsn::utils::list_hostname_from_ip(\"127.0.0.1,127.0.0.23323,111127.0.0.3\",\n                                                   &hostname_result));\n    ASSERT_EQ(\"localhost,127.0.0.23323,111127.0.0.3\", hostname_result);\n\n    ASSERT_FALSE(dsn::utils::list_hostname_from_ip(\"123.456.789.111,127.0.0.1\", &hostname_result));\n    ASSERT_EQ(\"123.456.789.111,localhost\", hostname_result);\n\n    // static bool list_hostname_from_ip_port(const char *ip_port_list,std::string\n    // *hostname_result_list);\n    ASSERT_TRUE(\n        dsn::utils::list_hostname_from_ip_port(valid_ip_port_list.c_str(), &hostname_result));\n    ASSERT_EQ(expected_hostname_port_list, hostname_result);\n\n    ASSERT_FALSE(dsn::utils::list_hostname_from_ip_port(\n        \"127.0.3333.1:23456,1127.0.0.2:22233,127.0.0.1:8080\", &hostname_result));\n    ASSERT_EQ(\"127.0.3333.1:23456,1127.0.0.2:22233,localhost:8080\", hostname_result);\n}\n\nTEST(ip_to_hostname, invalid_ip)\n{\n\n    std::string hostname_result;\n    const std::string invalid_ip = \"123.456.789.111\";\n    const std::string invalid_ip_port = \"123.456.789.111:23010\";\n\n    ASSERT_FALSE(dsn::utils::hostname_from_ip(invalid_ip.c_str(), &hostname_result));\n    ASSERT_EQ(invalid_ip, hostname_result);\n\n    ASSERT_FALSE(dsn::utils::hostname_from_ip_port(invalid_ip_port.c_str(), &hostname_result));\n    ASSERT_EQ(invalid_ip_port, hostname_result);\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/join_point_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/join_point.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\n\nclass join_point_test : public ::testing::Test\n{\npublic:\n};\n\nvoid advice1() {}\n\n// smoke test\nTEST_F(join_point_test, add_pure_functions)\n{\n    join_point<void> jp(\"test\");\n    jp.put_back(advice1, \"test\");\n    jp.put_front(advice1, \"test\");\n    jp.put_native(advice1);\n    jp.execute();\n\n    ASSERT_STREQ(jp.name(), \"test\");\n}\n\nTEST_F(join_point_test, adding_order)\n{\n    join_point<void, int> jp(\"test\");\n\n    std::vector<int> vec;\n    jp.put_back([&](int val) { vec.push_back(1); }, \"test\");\n    jp.put_back([&](int val) { vec.push_back(2); }, \"test\");\n    jp.put_back([&](int val) { vec.push_back(3); }, \"test\");\n    jp.put_front([&](int val) { vec.push_back(4); }, \"test\");\n    jp.put_front([&](int val) { vec.push_back(5); }, \"test\");\n    jp.execute(0);\n\n    ASSERT_EQ(vec, std::vector<int>({5, 4, 1, 2, 3}));\n}\n\nTEST_F(join_point_test, with_return_value)\n{\n    join_point<double, int, double, std::string> jp(\"test\");\n\n    std::vector<int> vec;\n    std::string expected_str;\n\n    jp.put_back([&](int, double, std::string) { vec.push_back(1); }, \"test\");\n    jp.put_back([&](int, double, std::string) { vec.push_back(2); }, \"test\");\n    jp.put_native([&](int b, double c, std::string str) -> double {\n        vec.push_back(3);\n        expected_str = str;\n        return c + b;\n    });\n    double exec_res = jp.execute(5, 0.5, std::string(\"abc\"), 0.0);\n\n    ASSERT_EQ(vec, std::vector<int>({3, 1, 2}));\n    ASSERT_EQ(exec_res, 5.5);\n    ASSERT_EQ(expected_str, \"abc\");\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/json_helper_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <gtest/gtest.h>\n#include <dsn/cpp/json_helper.h>\n\nnamespace dsn {\n\nclass test_entity\n{\npublic:\n    int field = 1;\n    const int const_field = 2;\n\nprivate:\n    int private_field = 3;\n    const int private_const_field = 4;\n\npublic:\n    DEFINE_JSON_SERIALIZATION(field, const_field, private_field, private_const_field)\n\n    bool operator==(const test_entity &rhs) const\n    {\n        return field == rhs.field && const_field == rhs.const_field &&\n               private_field == rhs.private_field && private_const_field == rhs.private_const_field;\n    }\n};\n\nstruct struct_type1\n{\n    bool b;\n    std::string s;\n    double d;\n\n    bool operator==(const struct_type1 &another) const\n    {\n        return b == another.b && s == another.s && d == another.d;\n    }\n};\nNON_MEMBER_JSON_SERIALIZATION(struct_type1, b, s, d)\n\nstruct struct_type2\n{\n    int8_t i8;\n    int16_t i16;\n    int32_t i32;\n    int64_t i64;\n    bool operator==(const struct_type2 &another) const\n    {\n        return i8 == another.i8 && i16 == another.i16 && i32 == another.i32 && i64 == another.i64;\n    }\n};\nNON_MEMBER_JSON_SERIALIZATION(struct_type2, i8, i16, i32, i64)\n\nstruct struct_type3\n{\n    uint8_t u8;\n    uint16_t u16;\n    uint32_t u32;\n    uint64_t u64;\n    bool operator==(const struct_type3 &another) const\n    {\n        return u8 == another.u8 && u16 == another.u16 && u32 == another.u32 && u64 == another.u64;\n    }\n};\nNON_MEMBER_JSON_SERIALIZATION(struct_type3, u8, u16, u32, u64)\n\nstruct nested_type\n{\n    std::shared_ptr<std::string> str;\n    struct_type1 t1;\n    std::vector<struct_type2> t2_vec;\n    std::map<std::string, struct_type3> t3_map;\n    std::unordered_map<int, double> t4_umap;\n    std::set<uint32_t> t5_set;\n    bool operator==(const nested_type &another) const\n    {\n        return *(str.get()) == *(another.str.get()) && t1 == another.t1 &&\n               t2_vec == another.t2_vec && t3_map == another.t3_map && t4_umap == another.t4_umap &&\n               t5_set == another.t5_set;\n    }\n};\nNON_MEMBER_JSON_SERIALIZATION(nested_type, str, t1, t2_vec, t3_map, t4_umap, t5_set)\n\nstruct older_struct\n{\n    int a;\n    std::string b;\n    struct_type1 t1;\n    std::map<std::string, std::string> c;\n    DEFINE_JSON_SERIALIZATION(a, b, t1, c)\n};\n\nstruct new_struct_type1 : struct_type1\n{\n    std::vector<std::string> new_vecs;\n    DEFINE_JSON_SERIALIZATION(b, s, d, new_vecs)\n};\n\nstruct newer_struct\n{\n    int a;\n    std::string b;\n    new_struct_type1 t1;\n    std::map<std::string, std::string> c;\n    std::vector<int> d;\n    std::map<int, int> e;\n    DEFINE_JSON_SERIALIZATION(a, b, t1, c, d, e)\n};\n\n// This test verifies that json_forwarder can correctly encode an object with private\n// and const fields.\nTEST(json_helper, encode_and_decode)\n{\n    test_entity entity;\n    // ensures that `entity` doesn't equal to the default value of `decoded_entity`\n    entity.field = 5;\n\n    blob encoded_entity = dsn::json::json_forwarder<test_entity>::encode(entity);\n\n    test_entity decoded_entity;\n    dsn::json::json_forwarder<test_entity>::decode(encoded_entity, decoded_entity);\n\n    ASSERT_EQ(entity, decoded_entity);\n}\n\nTEST(json_helper, simple_type_encode_decode)\n{\n    struct_type1 t1_in, t1_out;\n    t1_in.b = true;\n    t1_in.d = -0.00;\n    t1_in.s = \"hahaha\";\n\n    t1_out.b = false;\n    t1_out.d = -0.0;\n    t1_out.s = \"\";\n\n    dsn::blob bb = dsn::json::json_forwarder<struct_type1>::encode(t1_in);\n    t1_in.s = \"\";\n    ASSERT_TRUE(dsn::json::json_forwarder<struct_type1>::decode(bb, t1_out));\n\n    ASSERT_EQ(t1_in.b, t1_out.b);\n    ASSERT_EQ(t1_in.d, t1_out.d);\n    ASSERT_EQ(\"hahaha\", t1_out.s);\n\n    t1_in.b = false;\n    t1_in.d = 99.999;\n    t1_in.s = \"\";\n\n    bb = dsn::json::json_forwarder<struct_type1>::encode(t1_in);\n    ASSERT_TRUE(dsn::json::json_forwarder<struct_type1>::decode(bb, t1_out));\n\n    ASSERT_EQ(t1_in, t1_out);\n\n    t1_in.s = \"string with escape: \\\\\\\\, \\\\\\\"\";\n    bb = dsn::json::json_forwarder<struct_type1>::encode(t1_in);\n    ASSERT_TRUE(dsn::json::json_forwarder<struct_type1>::decode(bb, t1_out));\n    std::cout << bb.data() << std::endl;\n    ASSERT_EQ(t1_in.s, t1_out.s);\n}\n\nTEST(json_helper, int_type_encode_decode)\n{\n    struct_type2 t_in, t_out;\n    t_in.i8 = 0x80;\n    t_in.i16 = 0x8000;\n    t_in.i32 = 0x80000000;\n    t_in.i64 = 0x8000000000000000;\n\n    t_out.i8 = 0;\n    t_out.i16 = 0;\n    t_out.i32 = 0;\n    t_out.i64 = 0;\n\n    dsn::blob bb = dsn::json::json_forwarder<struct_type2>::encode(t_in);\n    ASSERT_TRUE(dsn::json::json_forwarder<struct_type2>::decode(bb, t_out));\n\n    ASSERT_EQ(t_in, t_out);\n\n    t_in.i8 = 0x7f;\n    t_in.i16 = 0x7fff;\n    t_in.i32 = 0x7fffffff;\n    t_in.i64 = 0x7fffffffffffffff;\n\n    bb = dsn::json::json_forwarder<struct_type2>::encode(t_in);\n    ASSERT_TRUE(dsn::json::json_forwarder<struct_type2>::decode(bb, t_out));\n\n    ASSERT_EQ(t_in, t_out);\n}\n\nTEST(json_helper, int_overflow_underflow)\n{\n    const char *abnormal_targets[] = {\n        \"{\\\"i8\\\":128,\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32768,\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":2147483648,\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775808}\",\n        \"{\\\"i8\\\":-129,\\\"i16\\\":-32768,\\\"i32\\\":-2147483648,\\\"i64\\\":-9223372036854775808}\",\n        \"{\\\"i8\\\":-128,\\\"i16\\\":-32769,\\\"i32\\\":-2147483648,\\\"i64\\\":-9223372036854775808}\",\n        \"{\\\"i8\\\":-128,\\\"i16\\\":-32768,\\\"i32\\\":-2147483649,\\\"i64\\\":-9223372036854775808}\",\n        \"{\\\"i8\\\":-128,\\\"i16\\\":-32768,\\\"i32\\\":-2147483648,\\\"i64\\\":-9223372036854775809}\"};\n\n    struct_type2 t;\n\n    for (int i = 0; i < 8; ++i) {\n        dsn::blob bb(abnormal_targets[i], 0, strlen(abnormal_targets[i]));\n        bool result = dsn::json::json_forwarder<struct_type2>::decode(bb, t);\n        ASSERT_FALSE(result);\n    }\n\n    const char *normal_targets[] = {\n        \"{\\\"i8\\\":-128,\\\"i16\\\":-32768,\\\"i32\\\":-2147483648,\\\"i64\\\":-9223372036854775808}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775807}\"};\n\n    struct_type2 normal_results[2];\n    normal_results[0].i8 = 0x80;\n    normal_results[0].i16 = 0x8000;\n    normal_results[0].i32 = 0x80000000;\n    normal_results[0].i64 = 0x8000000000000000;\n\n    normal_results[1].i8 = 0x7f;\n    normal_results[1].i16 = 0x7fff;\n    normal_results[1].i32 = 0x7fffffff;\n    normal_results[1].i64 = 0x7fffffffffffffff;\n\n    for (int i = 0; i < 2; ++i) {\n        dsn::blob bb(normal_targets[i], 0, strlen(normal_targets[i]));\n        bool result = dsn::json::json_forwarder<struct_type2>::decode(bb, t);\n        ASSERT_TRUE(result);\n\n        ASSERT_EQ(normal_results[i], t);\n    }\n}\n\nTEST(json_helper, uint_encode_decode)\n{\n    struct_type3 t_in, t_out;\n    t_in.u8 = 0xff;\n    t_in.u16 = 0xffff;\n    t_in.u32 = 0xffffffff;\n    t_in.u64 = 0xffffffffffffffff;\n\n    dsn::blob bb = dsn::json::json_forwarder<struct_type3>::encode(t_in);\n\n    t_out.u8 = 0;\n    t_out.u16 = 0;\n    t_out.u32 = 0;\n    t_out.u64 = 0;\n    dsn::json::json_forwarder<struct_type3>::decode(bb, t_out);\n\n    ASSERT_EQ(t_in, t_out);\n}\n\nTEST(json_helper, uint_overflow_underflow)\n{\n    const char *abnormal_cases[] = {\n        \"{\\\"u8\\\":256,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65536,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967296,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551616}\",\n        \"{\\\"u8\\\":-1,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":-1,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":-1,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":-1}\",\n    };\n\n    struct_type3 t;\n    for (int i = 0; i < 8; ++i) {\n        dsn::blob bb(abnormal_cases[i], 0, strlen(abnormal_cases[i]));\n        bool ans = dsn::json::json_forwarder<struct_type3>::decode(bb, t);\n        ASSERT_FALSE(ans);\n    }\n\n    const char *normal_cases[] = {\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":0,\\\"u16\\\":0,\\\"u32\\\":0,\\\"u64\\\":0}\",\n    };\n\n    struct_type3 normal_results[2];\n    normal_results[0].u8 = 0xff;\n    normal_results[0].u16 = 0xffff;\n    normal_results[0].u32 = 0xffffffff;\n    normal_results[0].u64 = 0xffffffffffffffff;\n\n    normal_results[1].u8 = 0;\n    normal_results[1].u16 = 0;\n    normal_results[1].u32 = 0;\n    normal_results[1].u64 = 0;\n\n    for (int i = 0; i < 2; ++i) {\n        dsn::blob bb(normal_cases[i], 0, strlen(normal_cases[i]));\n        bool result = dsn::json::json_forwarder<struct_type3>::decode(bb, t);\n        ASSERT_TRUE(result);\n\n        ASSERT_EQ(normal_results[i], t);\n    }\n}\n\nTEST(json_helper, nested_type_encode_decode)\n{\n    nested_type nt;\n    nt.str = std::make_shared<std::string>(\"this is a shared ptr string\");\n    nt.t1 = struct_type1{false, \"simple\", 99.99999};\n    nt.t2_vec = {struct_type2{2, 4, 6, 8}, struct_type2{-3, -5, -7, -9}};\n    nt.t3_map = {{\"string1\", struct_type3{1, 3, 4, 6}}, {\"string2\", struct_type3{0, 0, 0, 0}}};\n    nt.t4_umap = {{123, 0.23333354}, {-123, 99.99999}};\n    nt.t5_set = {1, 3, 5, 7, 9};\n\n    dsn::blob bb = dsn::json::json_forwarder<decltype(nt)>::encode(nt);\n\n    nested_type nt2;\n    dsn::json::json_forwarder<decltype(nt)>::decode(bb, nt2);\n\n    ASSERT_EQ(nt, nt2);\n}\n\nTEST(json_helper, decode_invalid_json)\n{\n    // decode from invalid json\n    const char *json[] = {\n        \"{\\\"a\\\":1,\\\"b\\\":\\\"hehe\\\",\\\"c\\\":{\\\"aa\\\":\\\"bb\\\",\\\"cc\\\":\\\"dd\\\"},\\\"c\\\":[1,3,4],\"\n        \"\\\"d\\\":{\\\"1\\\":4,\\\"2\\\":3},\\\"hehe\\\"}\",\n        \"{\\\"a\\\":[]}\",\n        \"{\\\"c\\\":1}\",\n        \"{\\\"a\\\":1,\\\"xxx\\\":\\\"hehe\\\"}\",\n        \"{\\\"a\\\":1,\\\"b\\\":\\\"hehe\\\"\",\n        \"{\\\"a\\\":1,\\\"b\\\":\\\"hehe\\\",\",\n        \"{\\\"a\\\":1,\\\"b\\\":\\\"hehe\\\",}\",\n        \"{\\\"a\\\":1,\\\"b\\\":\\\"hehe\\\",{\\\"x\\\":\\\"y\\\"}}}\",\n        nullptr};\n\n    older_struct o;\n    for (int i = 0; json[i]; ++i) {\n        dsn::blob in(json[i], 0, strlen(json[i]));\n        ASSERT_FALSE(dsn::json::json_forwarder<older_struct>::decode(in, o));\n    }\n}\n\nTEST(json_helper, type_mismatch)\n{\n    struct_type1 t1;\n    const char *type_mismatch1[] = {\n        \"{\\\"b\\\":\\\"heheda\\\",\\\"s\\\":\\\"heheda\\\",\\\"d\\\":12.345}\",\n        \"{\\\"b\\\":1,\\\"s\\\":[1, 2, 3],\\\"d\\\":12.345}\",\n        \"{\\\"b\\\":1,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":12.3.4.5}\",\n        \"{\\\"b\\\":1,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":{}}\",\n        \"{\\\"b\\\":t,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":{}}\",\n        \"{\\\"b\\\":1,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":12.345}\",\n        \"{\\\"b\\\":1,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":2345}\",\n        \"{\\\"b\\\":1,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":-0}\",\n        \"{\\\"b\\\":true,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":-0}\",\n        \"{\\\"b\\\":false,\\\"s\\\":\\\"heheda\\\",\\\"d\\\":-0}\",\n        nullptr,\n    };\n    bool expected1[] = {false, false, false, false, false, true, true, true, true, true};\n    for (int i = 0; type_mismatch1[i]; ++i) {\n        dsn::blob bb(type_mismatch1[i], 0, strlen(type_mismatch1[i]));\n        bool result = dsn::json::json_forwarder<struct_type1>::decode(bb, t1);\n        ASSERT_EQ(expected1[i], result) << \"case \" << i << \" failed: \" << type_mismatch1[i];\n    }\n\n    struct_type2 t2;\n    const char *int_mismatch[] = {\n        \"{\\\"i8\\\":\\\"aa\\\",\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":[1, 3, 5],\\\"i32\\\":2147483647,\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":{},\\\"i64\\\":9223372036854775807}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":0.256}\",\n        \"{\\\"i8\\\":127,\\\"i16\\\":32767,\\\"i32\\\":2147483647,\\\"i64\\\":0}\",\n        nullptr,\n    };\n    bool expected[] = {false, false, false, false, true};\n    for (int i = 0; int_mismatch[i]; ++i) {\n        dsn::blob bb(int_mismatch[i], 0, strlen(int_mismatch[i]));\n        bool result = dsn::json::json_forwarder<struct_type2>::decode(bb, t2);\n        ASSERT_EQ(expected[i], result) << \"case \" << i << \" failed\";\n    }\n\n    struct_type3 t3;\n    const char *uint_mismatch[] = {\n        \"{\\\"u8\\\":[],\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":{\\\"a\\\":1, \\\"b\\\":2},\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":23.456,\\\"u64\\\":18446744073709551615}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":\\\"hehe\\\"}\",\n        \"{\\\"u8\\\":255,\\\"u16\\\":65535,\\\"u32\\\":4294967295,\\\"u64\\\":18446744073709551615}\",\n        nullptr,\n    };\n    bool expected_uint[] = {false, false, false, false, true};\n    for (int i = 0; uint_mismatch[i]; ++i) {\n        dsn::blob bb(uint_mismatch[i], 0, strlen(uint_mismatch[i]));\n        bool result = dsn::json::json_forwarder<struct_type3>::decode(bb, t3);\n        ASSERT_EQ(expected_uint[i], result);\n    }\n\n    nested_type nt;\n    const char *nt_mismatch[] = {\n        /// str is not string\n        \"{\\\"str\\\":12,\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\"\n        \"\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// t1 is not object\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":[],\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// t2 is not vector\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":\\\"heheda\\\",\\\"t3_\"\n        \"map\\\":{},\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// t3 is not map\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":[],\"\n        \"\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// t4 is not unordered_map\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\"\n        \"\\\"t4_umap\\\":234.5,\\\"t5_set\\\":[]}\",\n        /// t5 is not set\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\"\n        \"\\\"t4_umap\\\":{},\\\"t5_set\\\":1}\",\n        /// t1.s is not string\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":12,\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\"\n        \"\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// t2_vec is vector of integer\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[1, 3, \"\n        \"5],\\\"t3_map\\\":{},\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        /// normal case\n        \"{\\\"str\\\":\\\"a\\\",\\\"t1\\\":{\\\"b\\\":232,\\\"s\\\":\\\"\\\",\\\"d\\\":3.26e-322},\\\"t2_vec\\\":[],\\\"t3_map\\\":{},\"\n        \"\\\"t4_umap\\\":{},\\\"t5_set\\\":[]}\",\n        nullptr,\n    };\n    bool expected_nt[] = {false, false, false, false, false, false, false, false, true};\n    for (int i = 0; nt_mismatch[i]; ++i) {\n        dsn::blob bb(nt_mismatch[i], 0, strlen(nt_mismatch[i]));\n        bool result = dsn::json::json_forwarder<nested_type>::decode(bb, nt);\n        ASSERT_EQ(expected_nt[i], result) << \"case \" << i << \" failed\";\n    }\n}\n\nTEST(json_helper, upgrade_downgrade)\n{\n    // newer version of json can be decoded with older version of struct\n    newer_struct n;\n    n.a = 1;\n    n.b = \"hehe\";\n    n.t1.b = false;\n    n.t1.d = 23.445;\n    n.t1.new_vecs = {\"t1\", \"t2\", \"t3\"};\n    n.t1.s = \"haha\";\n    n.c = {{\"aa\", \"bb\"}, {\"cc\", \"dd\"}};\n    n.d = {1, 3, 4};\n    n.e = {{1, 4}, {2, 3}};\n    blob bb = dsn::json::json_forwarder<newer_struct>::encode(n);\n\n    older_struct o;\n    o.a = -1;\n    o.b = \"xixi\";\n    bool result = dsn::json::json_forwarder<older_struct>::decode(bb, o);\n    ASSERT_TRUE(result);\n\n    ASSERT_EQ(n.a, o.a);\n    ASSERT_EQ(n.b, o.b);\n    ASSERT_EQ(n.t1.b, o.t1.b);\n    ASSERT_EQ(n.t1.d, o.t1.d);\n    ASSERT_EQ(n.t1.s, o.t1.s);\n    ASSERT_EQ(n.c, o.c);\n\n    // older version of json can be decoded by newer version of struct\n    newer_struct n2;\n    blob bb2 = dsn::json::json_forwarder<older_struct>::encode(o);\n    result = dsn::json::json_forwarder<newer_struct>::decode(bb2, n2);\n    ASSERT_TRUE(result);\n\n    ASSERT_EQ(n2.a, o.a);\n    ASSERT_EQ(n2.b, o.b);\n    ASSERT_EQ(n2.t1.b, o.t1.b);\n    ASSERT_EQ(n2.t1.d, o.t1.d);\n    ASSERT_EQ(n2.t1.s, o.t1.s);\n    ASSERT_EQ(n2.c, o.c);\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/latency_tracer_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utils/latency_tracer.h>\n\nnamespace dsn {\nnamespace utils {\nclass latency_tracer_test : public testing::Test\n{\npublic:\n    int _tracer1_stage_count = 3;\n    int _tracer2_stage_count = 2;\n    int _sub_tracer_stage_count = 2;\n\n    std::shared_ptr<latency_tracer> _tracer1;\n    std::shared_ptr<latency_tracer> _tracer2;\n    std::shared_ptr<latency_tracer> _tracer3;\n    std::shared_ptr<latency_tracer> _tracer4;\n    std::shared_ptr<latency_tracer> _sub_tracer;\n\npublic:\n    void SetUp() override\n    {\n        FLAGS_enable_latency_tracer = true;\n        init_trace_points();\n    }\n\n    void init_trace_points()\n    {\n        _tracer1 = std::make_shared<latency_tracer>(false, \"name1\", 0);\n        for (int i = 0; i < _tracer1_stage_count; i++) {\n            ADD_CUSTOM_POINT(_tracer1, fmt::format(\"stage{}\", i));\n        }\n\n        _tracer2 = std::make_shared<latency_tracer>(false, \"name2\", 0);\n\n        for (int i = 0; i < _tracer2_stage_count; i++) {\n            ADD_CUSTOM_POINT(_tracer2, fmt::format(\"stage{}\", i));\n        }\n\n        _sub_tracer = std::make_shared<latency_tracer>(true, \"sub\", 0);\n        _sub_tracer->set_parent_point_name(\"test\");\n\n        _tracer1->add_sub_tracer(_sub_tracer);\n        _tracer2->add_sub_tracer(_sub_tracer);\n\n        for (int i = 0; i < _sub_tracer_stage_count; i++) {\n            ADD_CUSTOM_POINT(_sub_tracer, fmt::format(\"stage{}\", i));\n        }\n\n        _tracer3 = std::make_shared<latency_tracer>(false, \"name3\", 0);\n        APPEND_EXTERN_POINT(_tracer3, 123, \"test\");\n\n        _tracer4 = std::make_shared<latency_tracer>(false, \"name4\", 0, RPC_TEST);\n    }\n\n    std::map<int64_t, std::string> get_points(const std::shared_ptr<latency_tracer> &tracer)\n    {\n        return tracer->_points;\n    }\n\n    std::shared_ptr<latency_tracer> get_sub_tracer(const std::shared_ptr<latency_tracer> &tracer)\n    {\n        return tracer->sub_tracer(\"sub\");\n    }\n};\n\nTEST_F(latency_tracer_test, add_point)\n{\n    auto tracer1_points = get_points(_tracer1);\n    // tracer constructor will auto push one point, so the total count is stage_count + 1\n    ASSERT_EQ(tracer1_points.size(), _tracer1_stage_count + 1);\n    int count1 = 0;\n    bool tracer1_first = true;\n    for (const auto &point : tracer1_points) {\n        if (tracer1_first) {\n            tracer1_first = false;\n            continue;\n        }\n        ASSERT_EQ(point.second,\n                  fmt::format(\"latency_tracer_test.cpp:48:init_trace_points_stage{}\", count1++));\n    }\n\n    auto tracer2_points = get_points(_tracer2);\n    ASSERT_EQ(tracer2_points.size(), _tracer2_stage_count + 1);\n    int count2 = 0;\n    bool tracer2_first = true;\n    for (const auto &point : tracer2_points) {\n        if (tracer2_first) {\n            tracer2_first = false;\n            continue;\n        }\n        ASSERT_EQ(point.second,\n                  fmt::format(\"latency_tracer_test.cpp:54:init_trace_points_stage{}\", count2++));\n    }\n\n    auto tracer1_sub_tracer = get_sub_tracer(_tracer1);\n    auto tracer2_sub_tracer = get_sub_tracer(_tracer2);\n    ASSERT_EQ(tracer1_sub_tracer, tracer2_sub_tracer);\n\n    auto points = get_points(tracer1_sub_tracer);\n    ASSERT_TRUE(get_sub_tracer(tracer1_sub_tracer) == nullptr);\n    ASSERT_EQ(points.size(), _sub_tracer_stage_count + 1);\n    int count3 = 0;\n    bool sub_tracer_first = true;\n    for (const auto &point : points) {\n        if (sub_tracer_first) {\n            sub_tracer_first = false;\n            continue;\n        }\n        ASSERT_EQ(point.second,\n                  fmt::format(\"latency_tracer_test.cpp:64:init_trace_points_stage{}\", count3++));\n    }\n\n    // tracer3 append one invalid point, it will reset the last position and update the\n    // timestamp=previous+1\n    auto tracer3_points = get_points(_tracer3);\n    ASSERT_EQ(tracer3_points.size(), 2);\n    ASSERT_EQ(tracer3_points.rbegin()->first - tracer3_points.begin()->first, 1);\n\n    // tracer4 init with disable trace task code, the points size will be 0\n    auto tracer4_points = get_points(_tracer4);\n    ASSERT_EQ(tracer4_points.size(), 0);\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/lock.std.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"utils/lockp.std.h\"\n#include <gtest/gtest.h>\n\nusing namespace dsn;\nusing namespace dsn::tools;\n\nTEST(tools_common, std_lock_provider)\n{\n    std_lock_provider *lock = new std_lock_provider(nullptr);\n    lock->lock();\n    EXPECT_TRUE(lock->try_lock());\n    lock->unlock();\n    lock->unlock();\n\n    std_lock_nr_provider *nr_lock = new std_lock_nr_provider(nullptr);\n    nr_lock->lock();\n    EXPECT_FALSE(nr_lock->try_lock());\n    nr_lock->unlock();\n\n    std_rwlock_nr_provider *rwlock = new std_rwlock_nr_provider(nullptr);\n    rwlock->lock_read();\n    rwlock->unlock_read();\n    rwlock->lock_write();\n    rwlock->unlock_write();\n\n    std_semaphore_provider *sema = new std_semaphore_provider(0, nullptr);\n    std::thread t([](std_semaphore_provider *s) { s->wait(1000000); }, sema);\n    sema->signal(1);\n    t.join();\n\n    delete lock;\n    delete nr_lock;\n    delete rwlock;\n    delete sema;\n}\n"
  },
  {
    "path": "src/utils/test/logger.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for logger.\n *\n * Revision history:\n *     Nov., 2015, @shengofsun (Weijie Sun), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include \"utils/simple_logger.h\"\n#include <gtest/gtest.h>\n#include <dsn/utility/filesystem.h>\n\nusing namespace dsn;\nusing namespace dsn::tools;\n\nstatic const int simple_logger_gc_gap = 20;\n\nstatic void get_log_file_index(std::vector<int> &log_index)\n{\n    std::vector<std::string> sub_list;\n    std::string path = \"./\";\n    if (!utils::filesystem::get_subfiles(path, sub_list, false)) {\n        ASSERT_TRUE(false);\n    }\n\n    for (auto &ptr : sub_list) {\n        auto &&name = utils::filesystem::get_file_name(ptr);\n        if (name.length() <= 8 || name.substr(0, 4) != \"log.\")\n            continue;\n        int index;\n        if (1 != sscanf(name.c_str(), \"log.%d.txt\", &index))\n            continue;\n        log_index.push_back(index);\n    }\n}\n\nstatic void clear_files(std::vector<int> &log_index)\n{\n    char file[256];\n    memset(file, 0, sizeof(file));\n    for (auto i : log_index) {\n        snprintf_p(file, 256, \"log.%d.txt\", i);\n        dsn::utils::filesystem::remove_path(std::string(file));\n    }\n}\n\nstatic void prepare_test_dir()\n{\n    const char *dir = \"./test\";\n    std::string dr(dir);\n    dsn::utils::filesystem::create_directory(dr);\n    chdir(dir);\n}\n\nstatic void finish_test_dir()\n{\n    const char *dir = \"./test\";\n    chdir(\"..\");\n    rmdir(dir);\n}\n\nvoid log_print(logging_provider *logger, const char *fmt, ...)\n{\n    va_list vl;\n    va_start(vl, fmt);\n    logger->dsn_logv(__FILE__, __FUNCTION__, __LINE__, LOG_LEVEL_INFORMATION, fmt, vl);\n    va_end(vl);\n}\n\nTEST(tools_common, simple_logger)\n{\n    // cases for print_header\n    screen_logger *logger = new screen_logger(\"./\");\n    log_print(logger, \"%s\", \"test_print\");\n    std::thread t([](screen_logger *lg) { log_print(lg, \"%s\", \"test_print\"); }, logger);\n    t.join();\n\n    logger->flush();\n    delete logger;\n\n    prepare_test_dir();\n    // create multiple files\n    for (unsigned int i = 0; i < simple_logger_gc_gap + 10; ++i) {\n        simple_logger *logger = new simple_logger(\"./\");\n        // in this case stdout is useless\n        for (unsigned int i = 0; i != 1000; ++i)\n            log_print(logger, \"%s\", \"test_print\");\n        logger->flush();\n\n        delete logger;\n    }\n\n    std::vector<int> index;\n    get_log_file_index(index);\n    EXPECT_TRUE(!index.empty());\n    sort(index.begin(), index.end());\n    EXPECT_EQ(simple_logger_gc_gap, index.size());\n    clear_files(index);\n    finish_test_dir();\n}\n"
  },
  {
    "path": "src/utils/test/logging.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for logging.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <gtest/gtest.h>\n#include <iostream>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/c/api_utilities.h>\n\nTEST(core, logging)\n{\n    dsn_log_level_t level = dsn_log_get_start_level();\n    std::cout << \"logging start level = \" << level << std::endl;\n    dsn_logf(__FILENAME__,\n             __FUNCTION__,\n             __LINE__,\n             dsn_log_level_t::LOG_LEVEL_DEBUG,\n             \"in TEST(core, logging)\");\n    dsn_log(__FILENAME__, __FUNCTION__, __LINE__, dsn_log_level_t::LOG_LEVEL_DEBUG, \"\");\n}\n\nTEST(core, logging_big_log)\n{\n    std::string big_str(128000, 'x');\n    dsn_logf(__FILENAME__,\n             __FUNCTION__,\n             __LINE__,\n             dsn_log_level_t::LOG_LEVEL_DEBUG,\n             \"write big str %s\",\n             big_str.c_str());\n}\n\nTEST(core, dlog_f)\n{\n    struct test_case\n    {\n        enum dsn_log_level_t level;\n        std::string str;\n    } tests[] = {{dsn_log_level_t::LOG_LEVEL_DEBUG, \"This is a test\"},\n                 {dsn_log_level_t::LOG_LEVEL_DEBUG, \"\\\\x00%d\\\\x00\\\\x01%n/nm\"},\n                 {dsn_log_level_t::LOG_LEVEL_INFORMATION, \"\\\\x00%d\\\\x00\\\\x01%n/nm\"},\n                 {dsn_log_level_t::LOG_LEVEL_WARNING, \"\\\\x00%d\\\\x00\\\\x01%n/nm\"},\n                 {dsn_log_level_t::LOG_LEVEL_ERROR, \"\\\\x00%d\\\\x00\\\\x01%n/nm\"},\n                 {dsn_log_level_t::LOG_LEVEL_FATAL, \"\\\\x00%d\\\\x00\\\\x01%n/nm\"}};\n\n    for (auto test : tests) {\n        dlog_f(test.level, \"sortkey = {}\", test.str);\n    }\n}\n"
  },
  {
    "path": "src/utils/test/long_adder_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <chrono>\n#include <functional>\n#include <thread>\n#include <vector>\n\n#include <fmt/ostream.h>\n#include <gtest/gtest.h>\n\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/long_adder.h>\n\nnamespace dsn {\n\ntemplate <typename T>\nstruct type_parse_traits;\n\n#define REGISTER_PARSE_TYPE(X)                                                                     \\\n    template <>                                                                                    \\\n    struct type_parse_traits<X>                                                                    \\\n    {                                                                                              \\\n        static const char *name;                                                                   \\\n    };                                                                                             \\\n    const char *type_parse_traits<X>::name = #X\n\nREGISTER_PARSE_TYPE(striped_long_adder);\nREGISTER_PARSE_TYPE(concurrent_long_adder);\n\ntemplate <typename Adder>\nclass long_adder_test\n{\npublic:\n    long_adder_test() = default;\n\n    void run_increment_by(int64_t base_value,\n                          int64_t delta,\n                          int64_t num_operations,\n                          int64_t num_threads,\n                          int64_t &result)\n    {\n        execute(num_threads,\n                [this, delta, num_operations]() { this->increment_by(delta, num_operations); });\n        result = base_value + delta * num_operations * num_threads;\n        ASSERT_EQ(result, _adder.value());\n    }\n\n    void\n    run_increment(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)\n    {\n        execute(num_threads, [this, num_operations]() { this->increment(num_operations); });\n        result = base_value + num_operations * num_threads;\n        ASSERT_EQ(result, _adder.value());\n    }\n\n    void\n    run_decrement(int64_t base_value, int64_t num_operations, int64_t num_threads, int64_t &result)\n    {\n        execute(num_threads, [this, num_operations]() { this->decrement(num_operations); });\n        result = base_value - num_operations * num_threads;\n        ASSERT_EQ(result, _adder.value());\n    }\n\n    void run_basic_cases(int64_t num_threads)\n    {\n        fmt::print(stdout,\n                   \"Ready to run basic cases for {} with {} threads.\\n\",\n                   type_parse_traits<Adder>::name,\n                   num_threads);\n\n        // Initially should be zero\n        int64_t base_value = 0;\n        ASSERT_EQ(base_value, _adder.value());\n\n        // Do basic test with custom number of threads\n        auto do_increment_by = std::bind(&long_adder_test::run_increment_by,\n                                         this,\n                                         std::placeholders::_1,\n                                         std::placeholders::_2,\n                                         std::placeholders::_3,\n                                         num_threads,\n                                         std::placeholders::_4);\n        auto do_increment = std::bind(&long_adder_test::run_increment,\n                                      this,\n                                      std::placeholders::_1,\n                                      std::placeholders::_2,\n                                      num_threads,\n                                      std::placeholders::_3);\n        auto do_decrement = std::bind(&long_adder_test::run_decrement,\n                                      this,\n                                      std::placeholders::_1,\n                                      std::placeholders::_2,\n                                      num_threads,\n                                      std::placeholders::_3);\n\n        // Test increment_by\n        do_increment_by(base_value, 1, 1, base_value);\n        do_increment_by(base_value, 100, 1, base_value);\n        do_increment_by(base_value, 10, 10, base_value);\n        do_increment_by(base_value, -10, 10, base_value);\n        do_increment_by(base_value, -100, 1, base_value);\n        do_increment_by(base_value, -1, 1, base_value);\n        ASSERT_EQ(0, _adder.value());\n        ASSERT_EQ(0, base_value);\n\n        // Test increment\n        do_increment(base_value, 1, base_value);\n        do_increment(base_value, 100, base_value);\n\n        // Fetch and reset\n        ASSERT_EQ(base_value, _adder.fetch_and_reset());\n        base_value = 0;\n        ASSERT_EQ(base_value, _adder.value());\n\n        // Test decrement\n        do_decrement(base_value, 100, base_value);\n        do_decrement(base_value, 1, base_value);\n\n        // Reset at last\n        _adder.reset();\n        base_value = 0;\n        ASSERT_EQ(base_value, _adder.value());\n    }\n\n    void run_concurrent_cases(int64_t num_operations, int64_t num_threads)\n    {\n        fmt::print(\n            stdout, \"Ready to run concurrent cases for {}:\\n\", type_parse_traits<Adder>::name);\n\n        // Initially adder should be zero\n        int64_t base_value = 0;\n        ASSERT_EQ(base_value, _adder.value());\n\n        // Define runner to time each case\n        auto runner = [num_operations, num_threads](\n            const char *name, std::function<void(int64_t &)> func, int64_t &result) {\n            uint64_t start = dsn_now_ns();\n            func(result);\n            uint64_t end = dsn_now_ns();\n\n            auto duration_ns = static_cast<int64_t>(end - start);\n            std::chrono::nanoseconds nano(duration_ns);\n            auto duration_ms =\n                std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(nano).count();\n\n            fmt::print(stdout,\n                       \"Running {} operations of {} with {} threads took {} ms.\\n\",\n                       num_operations,\n                       name,\n                       num_threads,\n                       duration_ms);\n        };\n\n        // Test increment\n        auto do_increment = std::bind(&long_adder_test::run_increment,\n                                      this,\n                                      base_value,\n                                      num_operations,\n                                      num_threads,\n                                      std::placeholders::_1);\n        runner(\"Increment\", do_increment, base_value);\n\n        // Test decrement\n        auto do_decrement = std::bind(&long_adder_test::run_decrement,\n                                      this,\n                                      base_value,\n                                      num_operations,\n                                      num_threads,\n                                      std::placeholders::_1);\n        runner(\"Decrement\", do_decrement, base_value);\n\n        // At last adder should also be zero\n        ASSERT_EQ(0, _adder.value());\n        ASSERT_EQ(0, base_value);\n    }\n\nprivate:\n    void increment_by(int64_t delta, int64_t n)\n    {\n        for (int64_t i = 0; i < n; ++i) {\n            _adder.increment_by(delta);\n        }\n    }\n\n    void increment(int64_t num)\n    {\n        for (int64_t i = 0; i < num; ++i) {\n            _adder.increment();\n        }\n    }\n\n    void decrement(int64_t num)\n    {\n        for (int64_t i = 0; i < num; ++i) {\n            _adder.decrement();\n        }\n    }\n\n    void execute(int64_t num_threads, std::function<void()> runner)\n    {\n        std::vector<std::thread> threads;\n        for (int64_t i = 0; i < num_threads; i++) {\n            threads.emplace_back(runner);\n        }\n        for (auto &t : threads) {\n            t.join();\n        }\n    }\n\n    long_adder_wrapper<Adder> _adder;\n};\n\ntemplate <typename Adder>\nvoid run_basic_cases()\n{\n    long_adder_test<Adder> test;\n    test.run_basic_cases(1);\n    test.run_basic_cases(4);\n}\n\ntemplate <typename Adder0, typename Adder1, typename... Others>\nvoid run_basic_cases()\n{\n    run_basic_cases<Adder0>();\n    run_basic_cases<Adder1, Others...>();\n}\n\ntemplate <typename Adder>\nvoid run_concurrent_cases()\n{\n    long_adder_test<Adder> test;\n    test.run_concurrent_cases(10000000, 1);\n    test.run_concurrent_cases(10000000, 4);\n}\n\ntemplate <typename Adder0, typename Adder1, typename... Others>\nvoid run_concurrent_cases()\n{\n    run_concurrent_cases<Adder0>();\n    run_concurrent_cases<Adder1, Others...>();\n}\n\nTEST(long_adder_test, basic_cases) { run_basic_cases<striped_long_adder, concurrent_long_adder>(); }\n\nTEST(long_adder_test, concurrent_cases)\n{\n    run_concurrent_cases<striped_long_adder, concurrent_long_adder>();\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/main.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/c/api_utilities.h>\n#include <dsn/tool-api/logging_provider.h>\n#include <dsn/utility/flags.h>\n\nextern void command_manager_module_init();\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    command_manager_module_init();\n    // init logging\n    dsn_log_init(\"dsn::tools::simple_logger\", \"./\", nullptr);\n\n    dsn::flags_initialize();\n\n    return RUN_ALL_TESTS();\n}\n"
  },
  {
    "path": "src/utils/test/memutil_test.cpp",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n// Unit test for memutil.cc\n\n#include \"utils/memutil.h\"\n\n#include <gtest/gtest.h>\n\nTEST(MemUtilTest, memmatch)\n{\n    const char kHaystack[] = \"0123456789\";\n    EXPECT_EQ(dsn::strings_internal::memmatch(kHaystack, 0, \"\", 0), kHaystack);\n    EXPECT_EQ(dsn::strings_internal::memmatch(kHaystack, 10, \"012\", 3), kHaystack);\n    EXPECT_EQ(dsn::strings_internal::memmatch(kHaystack, 10, \"0xx\", 1), kHaystack);\n    EXPECT_EQ(dsn::strings_internal::memmatch(kHaystack, 10, \"789\", 3), kHaystack + 7);\n    EXPECT_EQ(dsn::strings_internal::memmatch(kHaystack, 10, \"9xx\", 1), kHaystack + 9);\n    EXPECT_TRUE(dsn::strings_internal::memmatch(kHaystack, 10, \"9xx\", 3) == nullptr);\n    EXPECT_TRUE(dsn::strings_internal::memmatch(kHaystack, 10, \"xxx\", 1) == nullptr);\n}\n"
  },
  {
    "path": "src/utils/test/metrics_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/metrics.h>\n#include <dsn/utility/rand.h>\n\n#include <chrono>\n#include <thread>\n#include <vector>\n\n#include <gtest/gtest.h>\n\n#include \"percentile_utils.h\"\n\nnamespace dsn {\n\nclass my_gauge : public metric\n{\npublic:\n    int64_t value() { return _value; }\n\nprotected:\n    explicit my_gauge(const metric_prototype *prototype) : metric(prototype), _value(0) {}\n\n    my_gauge(const metric_prototype *prototype, int64_t value) : metric(prototype), _value(value) {}\n\n    virtual ~my_gauge() = default;\n\nprivate:\n    friend class metric_entity;\n    friend class ref_ptr<my_gauge>;\n\n    int64_t _value;\n\n    DISALLOW_COPY_AND_ASSIGN(my_gauge);\n};\n\nusing my_gauge_prototype = metric_prototype_with<my_gauge>;\nusing my_gauge_ptr = ref_ptr<my_gauge>;\n\n} // namespace dsn\n\n#define METRIC_DEFINE_my_gauge(entity_type, name, unit, desc, ...)                                 \\\n    ::dsn::my_gauge_prototype METRIC_##name({#entity_type, #name, unit, desc, ##__VA_ARGS__})\n\nMETRIC_DEFINE_entity(my_server);\nMETRIC_DEFINE_entity(my_table);\nMETRIC_DEFINE_entity(my_replica);\n\nMETRIC_DEFINE_my_gauge(my_server,\n                       my_server_latency,\n                       dsn::metric_unit::kMicroSeconds,\n                       \"a server-level latency for test\");\nMETRIC_DEFINE_my_gauge(my_server,\n                       my_server_duration,\n                       dsn::metric_unit::kSeconds,\n                       \"a server-level duration for test\");\n\nMETRIC_DEFINE_my_gauge(my_replica,\n                       my_replica_latency,\n                       dsn::metric_unit::kNanoSeconds,\n                       \"a replica-level latency for test\");\nMETRIC_DEFINE_my_gauge(my_replica,\n                       my_replica_duration,\n                       dsn::metric_unit::kMilliSeconds,\n                       \"a replica-level duration for test\");\n\nMETRIC_DEFINE_gauge_int64(my_server,\n                          test_gauge_int64,\n                          dsn::metric_unit::kMilliSeconds,\n                          \"a server-level gauge of int64 type for test\");\n\nMETRIC_DEFINE_gauge_double(my_server,\n                           test_gauge_double,\n                           dsn::metric_unit::kSeconds,\n                           \"a server-level gauge of double type for test\");\n\nMETRIC_DEFINE_counter(my_server,\n                      test_counter,\n                      dsn::metric_unit::kRequests,\n                      \"a server-level counter for test\");\n\nMETRIC_DEFINE_concurrent_counter(my_server,\n                                 test_concurrent_counter,\n                                 dsn::metric_unit::kRequests,\n                                 \"a server-level concurrent_counter for test\");\n\nMETRIC_DEFINE_volatile_counter(my_server,\n                               test_volatile_counter,\n                               dsn::metric_unit::kRequests,\n                               \"a server-level volatile_counter for test\");\n\nMETRIC_DEFINE_concurrent_volatile_counter(my_server,\n                                          test_concurrent_volatile_counter,\n                                          dsn::metric_unit::kRequests,\n                                          \"a server-level concurrent_volatile_counter for test\");\n\nMETRIC_DEFINE_percentile_int64(my_server,\n                               test_percentile_int64,\n                               dsn::metric_unit::kNanoSeconds,\n                               \"a server-level percentile of int64 type for test\");\n\nMETRIC_DEFINE_percentile_double(my_server,\n                                test_percentile_double,\n                                dsn::metric_unit::kNanoSeconds,\n                                \"a server-level percentile of double type for test\");\n\nnamespace dsn {\n\nTEST(metrics_test, create_entity)\n{\n    // Test cases:\n    // - create an entity by instantiate(id) without any attribute\n    // - create another entity by instantiate(id, attrs) without any attribute\n    // - create an entity with an attribute\n    // - create another entity with an attribute\n    // - create an entity with 2 attributes\n    // - create another entity with 2 attributes\n    struct test_case\n    {\n        metric_entity_prototype *prototype;\n        std::string type_name;\n        std::string entity_id;\n        metric_entity::attr_map entity_attrs;\n        bool use_attrs_arg_if_empty;\n    } tests[] = {{&METRIC_ENTITY_my_server, \"my_server\", \"server_1\", {}, false},\n                 {&METRIC_ENTITY_my_server, \"my_server\", \"server_2\", {}, true},\n                 {&METRIC_ENTITY_my_table, \"my_table\", \"test_1\", {{\"table\", \"test_1\"}}, true},\n                 {&METRIC_ENTITY_my_table, \"my_table\", \"test_2\", {{\"table\", \"test_2\"}}, true},\n                 {&METRIC_ENTITY_my_replica,\n                  \"my_replica\",\n                  \"1.2\",\n                  {{\"table\", \"test_1\"}, {\"partition\", \"2\"}},\n                  true},\n                 {&METRIC_ENTITY_my_replica,\n                  \"my_replica\",\n                  \"2.5\",\n                  {{\"table\", \"test_2\"}, {\"partition\", \"5\"}},\n                  true}};\n\n    metric_registry::entity_map entities;\n    for (const auto &test : tests) {\n        ASSERT_EQ(test.prototype->name(), test.type_name);\n\n        metric_entity_ptr entity;\n        if (test.entity_attrs.empty() && !test.use_attrs_arg_if_empty) {\n            entity = test.prototype->instantiate(test.entity_id);\n        } else {\n            entity = test.prototype->instantiate(test.entity_id, test.entity_attrs);\n        }\n\n        auto id = entity->id();\n        ASSERT_EQ(id, test.entity_id);\n\n        auto attrs = entity->attributes();\n        ASSERT_NE(attrs.find(\"entity\"), attrs.end());\n        ASSERT_EQ(attrs[\"entity\"], test.type_name);\n        ASSERT_EQ(attrs.size(), test.entity_attrs.size() + 1);\n        ASSERT_EQ(attrs.erase(\"entity\"), 1);\n        ASSERT_EQ(attrs, test.entity_attrs);\n\n        ASSERT_EQ(entities.find(test.entity_id), entities.end());\n        entities[test.entity_id] = entity;\n    }\n\n    ASSERT_EQ(metric_registry::instance().entities(), entities);\n}\n\nTEST(metrics_test, recreate_entity)\n{\n    // Test cases:\n    // - add an attribute to an emtpy map\n    // - add another attribute to a single-element map\n    // - remove an attribute from the map\n    // - remove the only attribute from the map\n    struct test_case\n    {\n        metric_entity::attr_map entity_attrs;\n    } tests[] = {\n        {{{\"name\", \"test\"}}}, {{{\"name\", \"test\"}, {\"id\", \"2\"}}}, {{{\"name\", \"test\"}}}, {{{}}}};\n\n    const std::string entity_id(\"test\");\n    auto expected_entity = METRIC_ENTITY_my_table.instantiate(entity_id);\n\n    for (const auto &test : tests) {\n        // the pointer of entity should be kept unchanged\n        auto entity = METRIC_ENTITY_my_table.instantiate(entity_id, test.entity_attrs);\n        ASSERT_EQ(entity, expected_entity);\n\n        // the attributes will be updated\n        auto attrs = entity->attributes();\n        ASSERT_EQ(attrs.erase(\"entity\"), 1);\n        ASSERT_EQ(attrs, test.entity_attrs);\n    }\n}\n\nTEST(metrics_test, create_metric)\n{\n    auto my_server_entity = METRIC_ENTITY_my_server.instantiate(\"server_3\");\n    auto my_replica_entity =\n        METRIC_ENTITY_my_replica.instantiate(\"3.7\", {{\"table\", \"test_3\"}, {\"partition\", \"7\"}});\n\n    // Test cases:\n    // - create an metric without any argument by an entity\n    // - create an metric with an argument by an entity\n    // - create an metric with an argument by another entity\n    // - create an metric without any argument by another entity\n    struct test_case\n    {\n        my_gauge_prototype *prototype;\n        metric_entity_ptr entity;\n        bool use_default_value;\n        int64_t value;\n    } tests[] = {{&METRIC_my_server_latency, my_server_entity, true, 0},\n                 {&METRIC_my_server_duration, my_server_entity, false, 10},\n                 {&METRIC_my_replica_latency, my_replica_entity, false, 100},\n                 {&METRIC_my_replica_duration, my_replica_entity, true, 0}};\n\n    using entity_map = std::unordered_map<metric_entity *, metric_entity::metric_map>;\n\n    entity_map expected_entities;\n    for (const auto &test : tests) {\n        my_gauge_ptr my_metric;\n        if (test.use_default_value) {\n            my_metric = test.prototype->instantiate(test.entity);\n        } else {\n            my_metric = test.prototype->instantiate(test.entity, test.value);\n        }\n\n        ASSERT_EQ(my_metric->value(), test.value);\n\n        auto iter = expected_entities.find(test.entity.get());\n        if (iter == expected_entities.end()) {\n            expected_entities[test.entity.get()] = {{test.prototype, my_metric}};\n        } else {\n            iter->second[test.prototype] = my_metric;\n        }\n    }\n\n    entity_map actual_entities;\n    auto entities = metric_registry::instance().entities();\n    for (const auto &entity : entities) {\n        if (expected_entities.find(entity.second.get()) != expected_entities.end()) {\n            actual_entities[entity.second.get()] = entity.second->metrics();\n        }\n    }\n\n    ASSERT_EQ(actual_entities, expected_entities);\n}\n\nTEST(metrics_test, recreate_metric)\n{\n    auto my_server_entity = METRIC_ENTITY_my_server.instantiate(\"server_4\");\n\n    auto my_metric = METRIC_my_server_latency.instantiate(my_server_entity, 5);\n    ASSERT_EQ(my_metric->value(), 5);\n\n    auto new_metric = METRIC_my_server_latency.instantiate(my_server_entity, 10);\n    ASSERT_EQ(my_metric->value(), 5);\n}\n\nTEST(metrics_test, gauge_int64)\n{\n    // Test cases:\n    // - create a gauge of int64 type without initial value, then increase\n    // - create a gauge of int64 type without initial value, then decrease\n    // - create a gauge of int64 type with initial value, then increase\n    // - create a gauge of int64 type with initial value, then decrease\n    struct test_case\n    {\n        std::string entity_id;\n        bool use_default_value;\n        int64_t initial_value;\n        int64_t new_value;\n    } tests[] = {{\"server_5\", true, 0, 5},\n                 {\"server_6\", true, 0, -5},\n                 {\"server_7\", false, 10, 100},\n                 {\"server_8\", false, 100, 10}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        gauge_ptr<int64_t> my_metric;\n        if (test.use_default_value) {\n            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity);\n        } else {\n            my_metric = METRIC_test_gauge_int64.instantiate(my_server_entity, test.initial_value);\n        }\n\n        ASSERT_EQ(my_metric->value(), test.initial_value);\n\n        my_metric->set(test.new_value);\n        ASSERT_EQ(my_metric->value(), test.new_value);\n\n        auto metrics = my_server_entity->metrics();\n        ASSERT_EQ(metrics[&METRIC_test_gauge_int64].get(), static_cast<metric *>(my_metric.get()));\n\n        ASSERT_EQ(my_metric->prototype(),\n                  static_cast<const metric_prototype *>(&METRIC_test_gauge_int64));\n    }\n}\n\nTEST(metrics_test, gauge_double)\n{\n    // Test cases:\n    // - create a gauge of double type without initial value, then increase\n    // - create a gauge of double type without initial value, then decrease\n    // - create a gauge of double type with initial value, then increase\n    // - create a gauge of double type with initial value, then decrease\n    struct test_case\n    {\n        std::string entity_id;\n        bool use_default_value;\n        double initial_value;\n        double new_value;\n    } tests[] = {{\"server_9\", true, 0.0, 5.278},\n                 {\"server_10\", true, 0.0, -5.278},\n                 {\"server_11\", false, 10.756, 100.128},\n                 {\"server_12\", false, 100.128, 10.756}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        gauge_ptr<double> my_metric;\n        if (test.use_default_value) {\n            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity);\n        } else {\n            my_metric = METRIC_test_gauge_double.instantiate(my_server_entity, test.initial_value);\n        }\n\n        ASSERT_DOUBLE_EQ(my_metric->value(), test.initial_value);\n\n        my_metric->set(test.new_value);\n        ASSERT_DOUBLE_EQ(my_metric->value(), test.new_value);\n\n        auto metrics = my_server_entity->metrics();\n        ASSERT_EQ(metrics[&METRIC_test_gauge_double].get(), static_cast<metric *>(my_metric.get()));\n\n        ASSERT_EQ(my_metric->prototype(),\n                  static_cast<const metric_prototype *>(&METRIC_test_gauge_double));\n    }\n}\n\nvoid execute(int64_t num_threads, std::function<void(int)> runner)\n{\n    std::vector<std::thread> threads;\n    for (int64_t i = 0; i < num_threads; ++i) {\n        threads.emplace_back([i, &runner]() { runner(i); });\n    }\n    for (auto &t : threads) {\n        t.join();\n    }\n}\n\ntemplate <typename MetricPtr>\nvoid increment_by(std::integral_constant<bool, true>, MetricPtr &my_metric, int64_t x)\n{\n    my_metric->increment_by(x);\n}\n\ntemplate <typename MetricPtr>\nvoid increment_by(std::integral_constant<bool, false>, MetricPtr &my_metric, int64_t x)\n{\n    // If x is positive, metric will be increased; otherwise, the metric will be decreased.\n    my_metric->decrement_by(-x);\n}\n\ntemplate <bool IsIncrement, typename MetricPtr>\nvoid run_increment_by(MetricPtr &my_metric,\n                      int64_t base_value,\n                      int64_t num_operations,\n                      int64_t num_threads,\n                      int64_t &result,\n                      bool allow_negative = true)\n{\n    std::vector<int64_t> deltas;\n    int64_t n = num_operations * num_threads;\n    deltas.reserve(n);\n\n    int64_t expected_value = base_value;\n    for (int64_t i = 0; i < n; ++i) {\n        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));\n        if (allow_negative && delta % 3 == 0) {\n            delta = -delta;\n        }\n        expected_value += delta;\n        deltas.push_back(delta);\n    }\n\n    execute(num_threads, [num_operations, &my_metric, &deltas](int64_t tid) mutable {\n        for (int64_t i = 0; i < num_operations; ++i) {\n            auto delta = deltas[tid * num_operations + i];\n            increment_by(std::integral_constant<bool, IsIncrement>{}, my_metric, delta);\n        }\n    });\n    ASSERT_EQ(my_metric->value(), expected_value);\n    result = expected_value;\n}\n\ntemplate <typename MetricPtr>\nvoid run_increment(MetricPtr &my_metric,\n                   int64_t base_value,\n                   int64_t num_operations,\n                   int64_t num_threads,\n                   int64_t &result)\n{\n    execute(num_threads, [num_operations, &my_metric](int) mutable {\n        for (int64_t i = 0; i < num_operations; ++i) {\n            my_metric->increment();\n        }\n    });\n\n    int64_t expected_value = base_value + num_operations * num_threads;\n    ASSERT_EQ(my_metric->value(), expected_value);\n    result = expected_value;\n}\n\ntemplate <typename MetricPtr>\nvoid run_decrement(MetricPtr &my_metric,\n                   int64_t base_value,\n                   int64_t num_operations,\n                   int64_t num_threads,\n                   int64_t &result)\n{\n    execute(num_threads, [num_operations, &my_metric](int) mutable {\n        for (int64_t i = 0; i < num_operations; ++i) {\n            my_metric->decrement();\n        }\n    });\n\n    int64_t expected_value = base_value - num_operations * num_threads;\n    ASSERT_EQ(my_metric->value(), expected_value);\n    result = expected_value;\n}\n\nvoid run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype, int64_t num_threads)\n{\n    // Test cases:\n    // - test the gauge with small-scale computations\n    // - test the gauge with large-scale computations\n    struct test_case\n    {\n        std::string entity_id;\n        int64_t increments_by;\n        int64_t decrements_by;\n        int64_t increments;\n        int64_t decrements;\n    } tests[] = {{\"server_13\", 100, 100, 1000, 1000},\n                 {\"server_14\", 1000000, 1000000, 10000000, 10000000}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        auto my_metric = prototype->instantiate(my_server_entity);\n\n        int64_t value = 0;\n        ASSERT_EQ(my_metric->value(), value);\n        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value);\n        run_increment_by<false>(my_metric, value, test.decrements_by, num_threads, value);\n        run_increment(my_metric, value, test.increments, num_threads, value);\n        run_decrement(my_metric, value, test.decrements, num_threads, value);\n\n        // Reset to 0 since this metric could be used again\n        my_metric->set(0);\n        ASSERT_EQ(my_metric->value(), 0);\n    }\n}\n\nvoid run_gauge_increment_cases(dsn::gauge_prototype<int64_t> *prototype)\n{\n    // Do single-threaded tests\n    run_gauge_increment_cases(prototype, 1);\n\n    // Do multi-threaded tests\n    run_gauge_increment_cases(prototype, 4);\n}\n\nTEST(metrics_test, gauge_increment) { run_gauge_increment_cases(&METRIC_test_gauge_int64); }\n\ntemplate <typename Adder>\nvoid run_counter_cases(dsn::counter_prototype<Adder> *prototype, int64_t num_threads)\n{\n    // Test cases:\n    // - test the counter with small-scale computations\n    // - test the counter with large-scale computations\n    struct test_case\n    {\n        std::string entity_id;\n        int64_t increments_by;\n        int64_t increments;\n    } tests[] = {{\"server_15\", 100, 1000}, {\"server_16\", 1000000, 10000000}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        auto my_metric = prototype->instantiate(my_server_entity);\n\n        int64_t value = 0;\n        ASSERT_EQ(my_metric->value(), value);\n        run_increment_by<true>(my_metric, value, test.increments_by, num_threads, value, false);\n        run_increment(my_metric, value, test.increments, num_threads, value);\n\n        my_metric->reset();\n        ASSERT_EQ(my_metric->value(), 0);\n\n        auto metrics = my_server_entity->metrics();\n        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));\n\n        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));\n    }\n}\n\ntemplate <typename Adder>\nvoid run_counter_cases(dsn::counter_prototype<Adder> *prototype)\n{\n    // Do single-threaded tests\n    run_counter_cases(prototype, 1);\n\n    // Do multi-threaded tests\n    run_counter_cases(prototype, 4);\n}\n\nTEST(metrics_test, counter)\n{\n    // Test both kinds of counter\n    run_counter_cases<striped_long_adder>(&METRIC_test_counter);\n    run_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_counter);\n}\n\ntemplate <typename Adder>\nvoid run_volatile_counter_write_and_read(dsn::volatile_counter_ptr<Adder> &my_metric,\n                                         int64_t num_operations,\n                                         int64_t num_threads_write,\n                                         int64_t num_threads_read)\n{\n    std::vector<int64_t> deltas;\n    int64_t n = num_operations * num_threads_write;\n    deltas.reserve(n);\n\n    int64_t expected_value = 0;\n    for (int64_t i = 0; i < n; ++i) {\n        auto delta = static_cast<int64_t>(dsn::rand::next_u64(1000000));\n        expected_value += delta;\n        deltas.push_back(delta);\n    }\n\n    auto results = new_cacheline_aligned_int64_array(static_cast<uint32_t>(num_threads_read));\n    std::vector<std::atomic_bool> completed(num_threads_write);\n    for (int64_t i = 0; i < num_threads_write; ++i) {\n        completed[i].store(false);\n    }\n\n    ASSERT_EQ(my_metric->value(), 0);\n\n    execute(num_threads_write + num_threads_read,\n            [num_operations, num_threads_write, &my_metric, &deltas, &results, &completed](\n                int64_t tid) mutable {\n                if (tid < num_threads_write) {\n                    for (int64_t i = 0; i < num_operations; ++i) {\n                        my_metric->increment_by(deltas[tid * num_operations + i]);\n                    }\n                    completed[tid].store(true);\n                } else {\n                    bool done = false;\n                    do {\n                        int64_t i = 0;\n                        for (; i < num_threads_write && completed[i].load(); ++i) {\n                        }\n                        if (i >= num_threads_write) {\n                            // All of the increment threads have finished, thus the loop can\n                            // be broken after the last time the value is fetched.\n                            done = true;\n                        }\n\n                        auto value = my_metric->value();\n                        if (value == 0) {\n                            // If zero is fetched, it's likely that recently the counter is\n                            // not updated frequently. Thus yield and try for the next time.\n                            std::this_thread::yield();\n                        } else {\n                            auto r = results.get();\n                            r[tid - num_threads_write]._value += value;\n                        }\n                    } while (!done);\n                }\n            });\n\n    int64_t value = 0;\n    for (int64_t i = 0; i < num_threads_read; ++i) {\n        value += results.get()[i]._value.load();\n    }\n    ASSERT_EQ(value, expected_value);\n    ASSERT_EQ(my_metric->value(), 0);\n}\n\ntemplate <typename Adder>\nvoid run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype,\n                                int64_t num_threads_write,\n                                int64_t num_threads_read)\n{\n    // Test cases:\n    // - test the volatile counter with small-scale computations\n    // - test the volatile counter with large-scale computations\n    struct test_case\n    {\n        std::string entity_id;\n        int64_t num_operations;\n    } tests[] = {{\"server_17\", 5000}, {\"server_18\", 5000000}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        auto my_metric = prototype->instantiate(my_server_entity);\n\n        run_volatile_counter_write_and_read(\n            my_metric, test.num_operations, num_threads_write, num_threads_read);\n\n        auto metrics = my_server_entity->metrics();\n        ASSERT_EQ(metrics[prototype].get(), static_cast<metric *>(my_metric.get()));\n\n        ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(prototype));\n    }\n}\n\ntemplate <typename Adder>\nvoid run_volatile_counter_cases(dsn::volatile_counter_prototype<Adder> *prototype)\n{\n    // Write with single thread and read with single thread\n    run_volatile_counter_cases(prototype, 1, 1);\n\n    // Write with multiple threads and read with single thread\n    run_volatile_counter_cases(prototype, 2, 1);\n\n    // Write with single thread and read with multiple threads\n    run_volatile_counter_cases(prototype, 1, 2);\n\n    // Write with multiple threads and read with multiple threads\n    run_volatile_counter_cases(prototype, 4, 2);\n}\n\nTEST(metrics_test, volatile_counter)\n{\n    // Test both kinds of volatile counter\n    run_volatile_counter_cases<striped_long_adder>(&METRIC_test_volatile_counter);\n    run_volatile_counter_cases<concurrent_long_adder>(&METRIC_test_concurrent_volatile_counter);\n}\n\ntemplate <typename T, typename Prototype, typename Checker>\nvoid run_percentile(const metric_entity_ptr &my_entity,\n                    const Prototype &prototype,\n                    const std::vector<T> &data,\n                    size_t num_preload,\n                    uint64_t interval_ms,\n                    uint64_t exec_ms,\n                    const std::set<kth_percentile_type> &kth_percentiles,\n                    size_t sample_size,\n                    size_t num_threads,\n                    const std::vector<T> &expected_elements,\n                    Checker checker)\n{\n    dassert_f(num_threads > 0, \"Invalid num_threads({})\", num_threads);\n    dassert_f(data.size() <= sample_size && data.size() % num_threads == 0,\n              \"Invalid arguments, data_size={}, sample_size={}, num_threads={}\",\n              data.size(),\n              sample_size,\n              num_threads);\n\n    auto my_metric = prototype.instantiate(my_entity, interval_ms, kth_percentiles, sample_size);\n\n    // Preload zero in current thread.\n    for (size_t i = 0; i < num_preload; ++i) {\n        my_metric->set(0);\n    }\n\n    // Load other data in each spawned thread evenly.\n    const size_t num_operations = data.size() / num_threads;\n    execute(static_cast<int64_t>(num_threads),\n            [num_operations, &my_metric, &data](int64_t tid) mutable {\n                for (size_t i = 0; i < num_operations; ++i) {\n                    my_metric->set(data[static_cast<size_t>(tid) * num_operations + i]);\n                }\n            });\n\n    // Wait a while in order to finish computing all percentiles.\n    std::this_thread::sleep_for(\n        std::chrono::milliseconds(my_metric->get_initial_delay_ms() + interval_ms + exec_ms));\n\n    // Check if actual elements of kth percentiles are equal to the expected ones.\n    std::vector<T> actual_elements;\n    for (const auto &kth : kAllKthPercentileTypes) {\n        T value;\n        if (kth_percentiles.find(kth) == kth_percentiles.end()) {\n            ASSERT_FALSE(my_metric->get(kth, value));\n            checker(value, 0);\n        } else {\n            ASSERT_TRUE(my_metric->get(kth, value));\n            actual_elements.push_back(value);\n        }\n    }\n    checker(actual_elements, expected_elements);\n\n    // Check if this percentile is included in the entity.\n    auto metrics = my_entity->metrics();\n    ASSERT_EQ(metrics[&prototype].get(), static_cast<metric *>(my_metric.get()));\n\n    // Check if the prototype is referenced by this percentile.\n    ASSERT_EQ(my_metric->prototype(), static_cast<const metric_prototype *>(&prototype));\n}\n\ntemplate <typename T, typename Prototype, typename CaseGenerator, typename Checker>\nvoid run_percentile_cases(const Prototype &prototype)\n{\n    using value_type = T;\n    const auto p50 = kth_percentile_type::P50;\n    const auto p90 = kth_percentile_type::P90;\n    const auto p99 = kth_percentile_type::P99;\n\n    // Test cases:\n    // - input none of sample with none of kth percentile\n    // - input 1 sample with none of kth percentile\n    // - input 1 sample with 1 kth percentile\n    // - input 1 sample with 2 kth percentiles\n    // - input 1 sample with all kth percentiles\n    // - input 1 sample with 1 kth percentile, capacity of 2\n    // - input 1 sample with 2 kth percentiles, capacity of 2\n    // - input 1 sample with all kth percentiles, capacity of 2\n    // - input 2 samples with 1 kth percentile\n    // - input 2 samples with 2 kth percentiles\n    // - input 2 samples with all kth percentiles\n    // - input 10 samples with 1 kth percentile, capacity of 16\n    // - input 10 samples with 2 kth percentiles, capacity of 16\n    // - input 10 samples with all kth percentiles, capacity of 16\n    // - input 10 samples with 1 kth percentile by 2 threads, capacity of 16\n    // - input 10 samples with 2 kth percentiles by 2 threads, capacity of 16\n    // - input 10 samples with all kth percentiles by 2 threads, capacity of 16\n    // - input 16 samples with 1 kth percentile\n    // - input 16 samples with 2 kth percentiles\n    // - input 16 samples with all kth percentiles\n    // - input 16 samples with 1 kth percentile by 2 threads\n    // - input 16 samples with 2 kth percentiles by 2 threads\n    // - input 16 samples with all kth percentiles by 2 threads\n    // - preload 5 samples and input 16 samples with 1 kth percentile by 2 threads\n    // - preload 5 samples and input 16 samples with 2 kth percentiles by 2 threads\n    // - preload 5 samples and input 16 samples with all kth percentiles by 2 threads\n    // - input 2000 samples with 1 kth percentile, capacity of 4096\n    // - input 2000 samples with 2 kth percentiles, capacity of 4096\n    // - input 2000 samples with all kth percentiles, capacity of 4096\n    // - input 2000 samples with 1 kth percentile by 4 threads, capacity of 4096\n    // - input 2000 samples with 2 kth percentiles by 4 threads, capacity of 4096\n    // - input 2000 samples with all kth percentiles by 4 threads, capacity of 4096\n    // - input 4096 samples with 1 kth percentile, capacity of 4096\n    // - input 4096 samples with 2 kth percentiles, capacity of 4096\n    // - input 4096 samples with all kth percentiles, capacity of 4096\n    // - input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096\n    // - input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096\n    // - input 4096 samples with all kth percentiles by 4 threads, capacity of 4096\n    // - preload 5 input 4096 samples with 1 kth percentile by 4 threads, capacity of 4096\n    // - preload 5 input 4096 samples with 2 kth percentiles by 4 threads, capacity of 4096\n    // - preload 5 input 4096 samples with all kth percentiles by 4 threads, capacity of 4096\n    struct test_case\n    {\n        std::string entity_id;\n        size_t data_size;\n        value_type initial_value;\n        uint64_t range_size;\n        size_t num_preload;\n        uint64_t interval_ms;\n        uint64_t exec_ms;\n        const std::set<kth_percentile_type> kth_percentiles;\n        size_t sample_size;\n        size_t num_threads;\n    } tests[] = {{\"server_19\", 0, 0, 2, 0, 50, 10, {}, 1, 1},\n                 {\"server_20\", 1, 0, 2, 0, 50, 10, {}, 1, 1},\n                 {\"server_21\", 1, 0, 2, 0, 50, 10, {p90}, 1, 1},\n                 {\"server_22\", 1, 0, 2, 0, 50, 10, {p50, p99}, 1, 1},\n                 {\"server_23\", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 1, 1},\n                 {\"server_24\", 1, 0, 2, 0, 50, 10, {p90}, 2, 1},\n                 {\"server_25\", 1, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},\n                 {\"server_26\", 1, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},\n                 {\"server_27\", 2, 0, 2, 0, 50, 10, {p90}, 2, 1},\n                 {\"server_28\", 2, 0, 2, 0, 50, 10, {p50, p99}, 2, 1},\n                 {\"server_29\", 2, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 2, 1},\n                 {\"server_30\", 10, 0, 2, 0, 50, 10, {p90}, 16, 1},\n                 {\"server_31\", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},\n                 {\"server_32\", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},\n                 {\"server_33\", 10, 0, 2, 0, 50, 10, {p90}, 16, 2},\n                 {\"server_34\", 10, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},\n                 {\"server_35\", 10, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},\n                 {\"server_36\", 16, 0, 2, 0, 50, 10, {p90}, 16, 1},\n                 {\"server_37\", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 1},\n                 {\"server_38\", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 1},\n                 {\"server_39\", 16, 0, 2, 0, 50, 10, {p90}, 16, 2},\n                 {\"server_40\", 16, 0, 2, 0, 50, 10, {p50, p99}, 16, 2},\n                 {\"server_41\", 16, 0, 2, 0, 50, 10, kAllKthPercentileTypes, 16, 2},\n                 {\"server_42\", 16, 0, 2, 5, 50, 10, {p90}, 16, 2},\n                 {\"server_43\", 16, 0, 2, 5, 50, 10, {p50, p99}, 16, 2},\n                 {\"server_44\", 16, 0, 2, 5, 50, 10, kAllKthPercentileTypes, 16, 2},\n                 {\"server_45\", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 1},\n                 {\"server_46\", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},\n                 {\"server_47\", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},\n                 {\"server_48\", 2000, 0, 5, 0, 50, 10, {p90}, 4096, 4},\n                 {\"server_49\", 2000, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},\n                 {\"server_50\", 2000, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},\n                 {\"server_51\", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 1},\n                 {\"server_52\", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 1},\n                 {\"server_53\", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 1},\n                 {\"server_54\", 4096, 0, 5, 0, 50, 10, {p90}, 4096, 4},\n                 {\"server_55\", 4096, 0, 5, 0, 50, 10, {p50, p99}, 4096, 4},\n                 {\"server_56\", 4096, 0, 5, 0, 50, 10, kAllKthPercentileTypes, 4096, 4},\n                 {\"server_57\", 4096, 0, 5, 5, 50, 10, {p90}, 4096, 4},\n                 {\"server_58\", 4096, 0, 5, 5, 50, 10, {p50, p99}, 4096, 4},\n                 {\"server_59\", 4096, 0, 5, 5, 50, 10, kAllKthPercentileTypes, 4096, 4}};\n\n    for (const auto &test : tests) {\n        auto my_server_entity = METRIC_ENTITY_my_server.instantiate(test.entity_id);\n\n        CaseGenerator generator(\n            test.data_size, test.initial_value, test.range_size, test.kth_percentiles);\n\n        std::vector<value_type> data;\n        std::vector<value_type> expected_elements;\n        generator(data, expected_elements);\n\n        run_percentile<value_type, Prototype, Checker>(my_server_entity,\n                                                       prototype,\n                                                       data,\n                                                       test.num_preload,\n                                                       test.interval_ms,\n                                                       test.exec_ms,\n                                                       test.kth_percentiles,\n                                                       test.sample_size,\n                                                       test.num_threads,\n                                                       expected_elements,\n                                                       Checker());\n    }\n}\n\ntemplate <typename T>\nclass integral_checker\n{\npublic:\n    void operator()(const T &actual_element, const T &expected_element) const\n    {\n        ASSERT_EQ(actual_element, expected_element);\n    }\n\n    void operator()(const std::vector<T> &actual_elements,\n                    const std::vector<T> &expected_elements) const\n    {\n        ASSERT_EQ(actual_elements, expected_elements);\n    }\n};\n\nTEST(metrics_test, percentile_int64)\n{\n    using value_type = int64_t;\n    run_percentile_cases<value_type,\n                         percentile_prototype<value_type>,\n                         integral_percentile_case_generator<value_type>,\n                         integral_checker<value_type>>(METRIC_test_percentile_int64);\n}\n\ntemplate <typename T>\nclass floating_checker\n{\npublic:\n    void operator()(const T &actual_element, const T &expected_element) const\n    {\n        ASSERT_DOUBLE_EQ(actual_element, expected_element);\n    }\n\n    void operator()(const std::vector<T> &actual_elements,\n                    const std::vector<T> &expected_elements) const\n    {\n        ASSERT_EQ(actual_elements.size(), expected_elements.size());\n        for (size_t i = 0; i < expected_elements.size(); ++i) {\n            ASSERT_DOUBLE_EQ(actual_elements[i], expected_elements[i]);\n        }\n    }\n};\n\nTEST(metrics_test, percentile_double)\n{\n    using value_type = double;\n    run_percentile_cases<value_type,\n                         floating_percentile_prototype<value_type>,\n                         floating_percentile_case_generator<value_type>,\n                         floating_checker<value_type>>(METRIC_test_percentile_double);\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/nth_element_bench/CMakeLists.txt",
    "content": "# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n\nset(MY_PROJ_NAME nth_element_bench)\nproject(${MY_PROJ_NAME} C CXX)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS dsn_runtime dsn_utils)\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_executable()\n\ndsn_install_executable()\n"
  },
  {
    "path": "src/utils/test/nth_element_bench/nth_element_bench.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <atomic>\n#include <chrono>\n#include <cstdint>\n#include <cstdlib>\n#include <functional>\n#include <map>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <fmt/format.h>\n\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/nth_element.h>\n#include <dsn/utility/smart_pointers.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/strings.h>\n\n#include \"utils/test/nth_element_utils.h\"\n\nvoid print_usage(const char *cmd)\n{\n    fmt::print(\"USAGE: {} <num_operations> <array_size> [nths]\\n\", cmd);\n    fmt::print(\"Run a simple benchmark that executes all sorts of nth_element_finder.\\n\\n\");\n\n    fmt::print(\"    <num_operations>       the number of operations.\\n\");\n    fmt::print(\"    <array_size>           the size of array for each operation.\\n\");\n    fmt::print(\"    <range_size>           the size of range for each operation to \\n\"\n               \"                           generate the integers randomly.\\n\");\n    fmt::print(\"    [nths]                 the nth list for each operation, separated by \\n\"\n               \"                           comma(,) if more than one element, e.g., \\n\"\n               \"                           \\\"2,5\\\" means finding 2nd and 5th elements;\\n\"\n               \"                           if this arg is missing, nth list of \\n\"\n               \"                           perf_counter_number_percentile_atomic will be \\n\"\n               \"                           used, that is, P50, P90, P95, P99 and P999.\\n\");\n}\n\ntemplate <typename NthElementFinder>\nint64_t run_nth_element(const std::vector<int64_t> &expected_elements,\n                        NthElementFinder &finder,\n                        std::function<void()> exec)\n{\n    auto start = dsn_now_ns();\n    exec();\n    auto end = dsn_now_ns();\n\n    if (finder.elements() != expected_elements) {\n        fmt::print(\n            \"actual_elements != expected_elements\\nactual_elements = {}\\nexpected_elements: {}\\n\",\n            fmt::join(finder.elements(), \" \"),\n            fmt::join(expected_elements, \" \"));\n        ::exit(-1);\n    }\n\n    return static_cast<int64_t>(end - start);\n}\n\nint64_t run_stl_nth_element(const std::vector<int64_t> &array,\n                            const std::vector<int64_t> &expected_elements,\n                            dsn::stl_nth_element_finder<int64_t> &finder)\n{\n    auto start = dsn_now_ns();\n    std::vector<int64_t> container(array.size());\n    std::copy(array.begin(), array.end(), container.begin());\n    auto end = dsn_now_ns();\n\n    return static_cast<int64_t>(end - start) +\n           run_nth_element(expected_elements, finder, [&finder, &container]() {\n               finder(container.begin(), container.begin(), container.end());\n           });\n}\n\nvoid run_bench(size_t num_operations,\n               size_t array_size,\n               uint64_t range_size,\n               const std::vector<size_t> &nths)\n{\n    auto get_perf_counter_nths = [](size_t num) -> std::vector<size_t> {\n        return {static_cast<size_t>(num * 0.5),\n                static_cast<size_t>(num * 0.9),\n                static_cast<size_t>(num * 0.95),\n                static_cast<size_t>(num * 0.99),\n                static_cast<size_t>(num * 0.999)};\n    };\n\n    dsn::perf_counter_nth_element_finder perf_counter_finder;\n    dsn::stl_nth_element_finder<int64_t> stl_finder;\n\n    std::map<std::string, int64_t> exec_time_map = {{\"perf_counter_nth_element\", 0},\n                                                    {\"stl_nth_element\", 0}};\n    for (size_t i = 0; i < num_operations; ++i) {\n        std::vector<size_t> real_nths;\n        if (nths.empty()) {\n            real_nths = get_perf_counter_nths(array_size);\n        } else {\n            real_nths = nths;\n        }\n\n        dsn::integral_nth_element_case_generator<int64_t> generator(\n            array_size, 0, range_size, real_nths);\n\n        std::vector<int64_t> array;\n        std::vector<int64_t> expected_elements;\n        generator(array, expected_elements);\n\n        // Once `nths` is empty, the comparison between stl_nth_element_finder and\n        // perf_counter_nth_element_finder will be launched.\n        if (nths.empty()) {\n            perf_counter_finder.load_data(array);\n            exec_time_map[\"perf_counter_nth_element\"] +=\n                run_nth_element(expected_elements, perf_counter_finder, [&perf_counter_finder]() {\n                    perf_counter_finder();\n                });\n        }\n\n        stl_finder.set_nths(real_nths);\n        exec_time_map[\"stl_nth_element\"] +=\n            run_stl_nth_element(array, expected_elements, stl_finder);\n    }\n\n    for (const auto &t : exec_time_map) {\n        if (t.second == 0) {\n            continue;\n        }\n\n        std::chrono::nanoseconds nano(t.second);\n        auto duration_s = std::chrono::duration_cast<std::chrono::duration<double>>(nano).count();\n        fmt::print(\"Running {} operations of {} with each array {} elements took {} seconds.\\n\",\n                   num_operations,\n                   t.first,\n                   array_size,\n                   duration_s);\n    }\n}\n\nint main(int argc, char **argv)\n{\n    if (argc < 4) {\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    uint64_t num_operations;\n    if (!dsn::buf2uint64(argv[1], num_operations)) {\n        fmt::print(stderr, \"Invalid num_operations: {}\\n\\n\", argv[1]);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n    if (num_operations <= 0) {\n        fmt::print(stderr, \"num_operations should be > 0: {}\\n\\n\", num_operations);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    uint64_t array_size;\n    if (!dsn::buf2uint64(argv[2], array_size)) {\n        fmt::print(stderr, \"Invalid array_size: {}\\n\\n\", argv[2]);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n    if (array_size <= 0 || array_size > MAX_QUEUE_LENGTH) {\n        fmt::print(\n            stderr, \"array_size({}) should be > 0 and <= {}\\n\\n\", array_size, MAX_QUEUE_LENGTH);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    uint64_t range_size;\n    if (!dsn::buf2uint64(argv[3], range_size)) {\n        fmt::print(stderr, \"Invalid range_size: {}\\n\\n\", argv[3]);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n    if (range_size <= 0) {\n        fmt::print(stderr, \"range_size({}) should be > 0\\n\\n\", range_size);\n\n        print_usage(argv[0]);\n        ::exit(-1);\n    }\n\n    std::vector<size_t> nths;\n    if (argc >= 5) {\n        std::vector<std::string> nth_strs;\n        dsn::utils::split_args(argv[4], nth_strs, ',');\n        for (const auto &s : nth_strs) {\n            size_t nth;\n            if (!dsn::buf2uint64(s, nth)) {\n                fmt::print(stderr, \"Invalid nth number: {}\\n\\n\", s);\n\n                print_usage(argv[0]);\n                ::exit(-1);\n            }\n\n            if (nth >= array_size) {\n                fmt::print(stderr, \"nth({}) should be < array_size({})\\n\\n\", array_size);\n\n                print_usage(argv[0]);\n                ::exit(-1);\n            }\n\n            nths.push_back(nth);\n        }\n    }\n\n    run_bench(num_operations, array_size, range_size, nths);\n\n    return 0;\n}\n"
  },
  {
    "path": "src/utils/test/nth_element_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/nth_element.h>\n\n#include <fmt/format.h>\n#include <gtest/gtest.h>\n\n#include \"nth_element_utils.h\"\n\nnamespace dsn {\n\ntemplate <typename NthElementFinder,\n          typename = typename std::enable_if<\n              std::is_integral<typename NthElementFinder::value_type>::value>::type>\nvoid run_integral_cases(const typename NthElementFinder::container_type &array,\n                        const typename NthElementFinder::nth_container_type &nths,\n                        const typename NthElementFinder::container_type &expected_elements)\n{\n    auto container = array;\n\n    NthElementFinder finder;\n    finder.set_nths(nths);\n    finder(container.begin(), container.begin(), container.end());\n\n    ASSERT_EQ(finder.elements(), expected_elements);\n}\n\ntemplate <typename NthElementFinder,\n          typename = typename std::enable_if<\n              std::is_integral<typename NthElementFinder::value_type>::value>::type>\nvoid run_basic_int64_cases()\n{\n    // Test cases:\n    // - both the array and the nth list are empty\n    // - the array has only one element, and the nth list is empty\n    // - the array has only one element, and the nth list has only one element\n    // - the array has only one element, and the nth list has duplicate elements\n    // - the array has only 2 identical elements, and the nth list has only one element\n    // - the array has only 2 identical elements, and the nth list has both elements\n    // - the array has only 2 identical elements, and the nth list has duplicat elements\n    // - the array has only 2 ordered elements, and the nth list has only one element\n    // - the array has only 2 ordered elements, and the nth list has both elements\n    // - the array has only 2 ordered elements, and the nth list has duplicat elements\n    // - the array has only 2 unordered elements, and the nth list has only one element\n    // - the array has only 2 unordered elements, and the nth list has both elements\n    // - the array has only 2 unordered elements, and the nth list has duplicat elements\n    // - the array contains identical elements, and the nth list has only one element\n    // - the array contains identical elements, and the nth list has all elements\n    // - the array contains identical elements, and the nth list has duplicat elements\n    // - all elements in the array are identical, and the nth list has 2 elements\n    // - all elements in the array are identical, and the nth list has all elements\n    // - all elements in the array are identical, and the nth list has duplicat elements\n    // - each element in the array is different from others, and the nth list has 3 elements\n    // - each element in the array is different from others, and the nth list has all elements\n    // - each element in the array is different from others, and the nth list has duplicat elements\n    struct test_case\n    {\n        typename NthElementFinder::container_type array;\n        typename NthElementFinder::nth_container_type nths;\n        typename NthElementFinder::container_type expected_elements;\n    } tests[] = {{{}, {}, {}},\n                 {{1}, {}, {}},\n                 {{1}, {0}, {1}},\n                 {{1}, {0, 0}, {1, 1}},\n                 {{1, 1}, {1}, {1}},\n                 {{1, 1}, {0, 1}, {1, 1}},\n                 {{1, 1}, {1, 1}, {1, 1}},\n                 {{1, 2}, {1}, {2}},\n                 {{1, 2}, {0, 1}, {1, 2}},\n                 {{1, 2}, {1, 1}, {2, 2}},\n                 {{2, 1}, {1}, {2}},\n                 {{2, 1}, {0, 1}, {1, 2}},\n                 {{2, 1}, {0, 0}, {1, 1}},\n                 {{2, 1, 2, 3, 2}, {2}, {2}},\n                 {{2, 1, 2, 3, 2}, {0, 1, 2, 3, 4}, {1, 2, 2, 2, 3}},\n                 {{2, 1, 2, 3, 2}, {0, 0, 2, 2, 3, 3}, {1, 1, 2, 2, 2, 2}},\n                 {{2, 2, 2, 2, 2, 2}, {2, 3}, {2, 2}},\n                 {{2, 2, 2, 2, 2, 2}, {0, 1, 2, 3, 4, 5}, {2, 2, 2, 2, 2, 2}},\n                 {{2, 2, 2, 2, 2, 2}, {1, 1, 2, 2, 5, 5}, {2, 2, 2, 2, 2, 2}},\n                 {{5, 6, 2, 8, 1, 7}, {3, 4, 5}, {6, 7, 8}},\n                 {{5, 6, 2, 8, 1, 7}, {0, 1, 2, 3, 4, 5}, {1, 2, 5, 6, 7, 8}},\n                 {{5, 6, 2, 8, 1, 7}, {0, 0, 2, 2, 5, 5}, {1, 1, 5, 5, 8, 8}}};\n\n    for (const auto &test : tests) {\n        run_integral_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);\n    }\n}\n\nTEST(nth_element_test, basic_int64) { run_basic_int64_cases<stl_nth_element_finder<int64_t>>(); }\n\ntemplate <typename NthElementFinder>\nvoid run_generated_int64_cases()\n{\n    // Test cases:\n    // - generate empty array with empty nth list\n    // - generate an array of only one element with the nth list of only one element\n    // - generate an array of 2 elements with the nth list of 2 elements\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000\n    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000\n    struct test_case\n    {\n        typename NthElementFinder::size_type array_size;\n        int64_t initial_value;\n        uint64_t range_size;\n        typename NthElementFinder::nth_container_type nths;\n    } tests[] = {{0, 0, 2, {}},\n                 {1, 0, 2, {0}},\n                 {2, 0, 2, {0, 1}},\n                 {5000, 0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};\n\n    for (const auto &test : tests) {\n        integral_nth_element_case_generator<int64_t> generator(\n            test.array_size, test.initial_value, test.range_size, test.nths);\n\n        integral_nth_element_case_generator<int64_t>::container_type array;\n        integral_nth_element_case_generator<int64_t>::container_type expected_elements;\n        generator(array, expected_elements);\n\n        run_integral_cases<NthElementFinder>(array, test.nths, expected_elements);\n    }\n}\n\nTEST(nth_element_test, generated_int64)\n{\n    run_generated_int64_cases<stl_nth_element_finder<int64_t>>();\n}\n\ntemplate <typename NthElementFinder,\n          typename = typename std::enable_if<\n              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>\nvoid run_floating_cases(const typename NthElementFinder::container_type &array,\n                        const typename NthElementFinder::nth_container_type &nths,\n                        const typename NthElementFinder::container_type &expected_elements)\n{\n    auto container = array;\n\n    NthElementFinder finder;\n    finder.set_nths(nths);\n    finder(container.begin(), container.begin(), container.end());\n\n    ASSERT_EQ(finder.elements().size(), expected_elements.size());\n    for (typename NthElementFinder::size_type i = 0; i < finder.elements().size(); ++i) {\n        ASSERT_DOUBLE_EQ(finder.elements()[i], expected_elements[i]);\n    }\n}\n\ntemplate <typename NthElementFinder,\n          typename = typename std::enable_if<\n              std::is_floating_point<typename NthElementFinder::value_type>::value>::type>\nvoid run_basic_double_cases()\n{\n    // Test cases:\n    // - both the array and the nth list are empty\n    // - the array has only one element, and the nth list is empty\n    // - the array has only one element, and the nth list has only one element\n    // - the array has only one element, and the nth list has duplicate elements\n    // - the array has only 2 identical elements, and the nth list has only one element\n    // - the array has only 2 identical elements, and the nth list has both elements\n    // - the array has only 2 identical elements, and the nth list has duplicat elements\n    // - the array has only 2 ordered elements, and the nth list has only one element\n    // - the array has only 2 ordered elements, and the nth list has both elements\n    // - the array has only 2 ordered elements, and the nth list has duplicat elements\n    // - the array has only 2 unordered elements, and the nth list has only one element\n    // - the array has only 2 unordered elements, and the nth list has both elements\n    // - the array has only 2 unordered elements, and the nth list has duplicat elements\n    // - the array contains identical elements, and the nth list has only one element\n    // - the array contains identical elements, and the nth list has all elements\n    // - the array contains identical elements, and the nth list has duplicat elements\n    // - all elements in the array are identical, and the nth list has 2 elements\n    // - all elements in the array are identical, and the nth list has all elements\n    // - all elements in the array are identical, and the nth list has duplicat elements\n    // - each element in the array is different from others, and the nth list has 3 elements\n    // - each element in the array is different from others, and the nth list has all elements\n    struct test_case\n    {\n        typename NthElementFinder::container_type array;\n        typename NthElementFinder::nth_container_type nths;\n        typename NthElementFinder::container_type expected_elements;\n    } tests[] = {\n        {{}, {}, {}},\n        {{1.23}, {}, {}},\n        {{1.23}, {0}, {1.23}},\n        {{1.23}, {0, 0}, {1.23, 1.23}},\n        {{1.23, 1.23}, {1}, {1.23}},\n        {{1.23, 1.23}, {0, 1}, {1.23, 1.23}},\n        {{1.23, 1.23}, {1, 1}, {1.23, 1.23}},\n        {{1.23, 2.34}, {1}, {2.34}},\n        {{1.23, 2.34}, {0, 1}, {1.23, 2.34}},\n        {{1.23, 2.34}, {1, 1}, {2.34, 2.34}},\n        {{2.34, 1.23}, {1}, {2.34}},\n        {{2.34, 1.23}, {0, 1}, {1.23, 2.34}},\n        {{2.34, 1.23}, {0, 0}, {1.23, 1.23}},\n        {{2.34, 1.23, 2.34, 3.56, 2.34}, {2}, {2.34}},\n        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 1, 2, 3, 4}, {1.23, 2.34, 2.34, 2.34, 3.56}},\n        {{2.34, 1.23, 2.34, 3.56, 2.34}, {0, 0, 2, 2, 3, 3}, {1.23, 1.23, 2.34, 2.34, 2.34, 2.34}},\n        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34}, {2, 3}, {2.34, 2.34}},\n        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},\n         {0, 1, 2, 3, 4, 5},\n         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},\n        {{2.34, 2.34, 2.34, 2.34, 2.34, 2.34},\n         {1, 1, 2, 2, 5, 5},\n         {2.34, 2.34, 2.34, 2.34, 2.34, 2.34}},\n        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89}, {3, 4, 5}, {6.78, 7.89, 8.90}},\n        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},\n         {0, 1, 2, 3, 4, 5},\n         {1.23, 2.34, 5.67, 6.78, 7.89, 8.90}},\n        {{5.67, 6.78, 2.34, 8.90, 1.23, 7.89},\n         {0, 0, 2, 2, 5, 5},\n         {1.23, 1.23, 5.67, 5.67, 8.90, 8.90}}};\n\n    for (const auto &test : tests) {\n        run_floating_cases<NthElementFinder>(test.array, test.nths, test.expected_elements);\n    }\n}\n\nTEST(nth_element_test, basic_double) { run_basic_double_cases<stl_nth_element_finder<double>>(); }\n\ntemplate <typename NthElementFinder>\nvoid run_generated_double_cases()\n{\n    // Test cases:\n    // - generate empty array with empty nth list\n    // - generate an array of only one element with the nth list of only one element\n    // - generate an array of 2 elements with the nth list of 2 elements\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 2\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 5\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 100\n    // - generate an array of 5000 elements with the nth list of 8 elements, at range size 10000\n    // - generate an array of 5000 elements with duplicate nth elements, at range size 10000\n    struct test_case\n    {\n        typename NthElementFinder::size_type array_size;\n        double initial_value;\n        uint64_t range_size;\n        typename NthElementFinder::nth_container_type nths;\n    } tests[] = {{0, 0.0, 2, {}},\n                 {1, 0.0, 2, {0}},\n                 {2, 0.0, 2, {0, 1}},\n                 {5000, 0.0, 2, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0.0, 5, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0.0, 10, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0.0, 100, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0.0, 10000, {999, 1999, 2499, 2999, 3499, 3999, 4499, 4999}},\n                 {5000, 0.0, 10000, {999, 999, 2999, 2999, 3999, 3999, 4999, 4999}}};\n\n    for (const auto &test : tests) {\n        floating_nth_element_case_generator<double> generator(\n            test.array_size, test.initial_value, test.range_size, test.nths);\n\n        floating_nth_element_case_generator<double>::container_type array;\n        floating_nth_element_case_generator<double>::container_type expected_elements;\n        generator(array, expected_elements);\n\n        run_floating_cases<NthElementFinder>(array, test.nths, expected_elements);\n    }\n}\n\nTEST(nth_element_test, generated_double)\n{\n    run_generated_double_cases<stl_nth_element_finder<double>>();\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/nth_element_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <algorithm>\n#include <cstdint>\n#include <memory>\n#include <type_traits>\n#include <utility>\n#include <vector>\n\n#include <fmt/format.h>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/dist/fmt_logging.h>\n#include <dsn/utility/ports.h>\n#include <dsn/utility/process_utils.h>\n#include <dsn/utility/rand.h>\n\n#include \"perf_counter/perf_counter_atomic.h\"\n\nnamespace dsn {\n\n// The generator is used to produce the test cases randomly for unit tests and benchmarks\n// of nth elements.\ntemplate <typename T,\n          typename Rand,\n          typename = typename std::enable_if<std::is_arithmetic<T>::value>::type>\nclass nth_element_case_generator\n{\npublic:\n    using value_type = T;\n    using container_type = typename std::vector<value_type>;\n    using size_type = typename container_type::size_type;\n    using nth_container_type = typename std::vector<size_type>;\n\n    nth_element_case_generator(size_type array_size,\n                               value_type initial_value,\n                               uint64_t range_size,\n                               const nth_container_type &nths)\n        : _array_size(array_size),\n          _initial_value(initial_value),\n          _range_size(range_size),\n          _nths(nths),\n          _rand(Rand())\n    {\n        dassert_f(std::is_sorted(_nths.begin(), _nths.end()),\n                  \"nth indexes({}) is not sorted\",\n                  fmt::join(_nths, \" \"));\n\n        for (const auto &nth : _nths) {\n            dassert_f(\n                nth >= 0 && nth < _array_size, \"nth should be in the range [0, {})\", _array_size);\n        }\n    }\n\n    ~nth_element_case_generator() = default;\n\n    // Generate an out-of-order `array` sized `_array_size`, and put nth elements of sorted\n    // `array` to `elements` in the order of `_nths` which must be sorted.\n    //\n    // The process has 2 stages:\n    // (1) Generate a sorted `array` from _initial_value. Always generate next element by current\n    // element plus _rand(_range_size). Once the index of an element belongs to nth indexes, it\n    // will be appended to `elements`.\n    // (2) After the sorted `array` is generated, it will be shuffled to be out-of-order.\n    void operator()(container_type &array, container_type &elements)\n    {\n        array.clear();\n        elements.clear();\n\n        auto value = _initial_value;\n        for (size_type i = 0, j = 0; i < _array_size; ++i) {\n            array.push_back(value);\n            for (; j < _nths.size() && _nths[j] == i; ++j) {\n                elements.push_back(value);\n            }\n\n            auto delta = _rand(_range_size);\n            value += delta;\n        }\n        std::random_shuffle(array.begin(), array.end());\n    }\n\nprivate:\n    const size_type _array_size;\n    const value_type _initial_value;\n    const uint64_t _range_size;\n    const nth_container_type _nths;\n    const Rand _rand;\n\n    DISALLOW_COPY_AND_ASSIGN(nth_element_case_generator);\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>\nclass integral_rand_generator\n{\npublic:\n    T operator()(const uint64_t &upper) const { return static_cast<T>(rand::next_u64(upper)); }\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>\nusing integral_nth_element_case_generator =\n    nth_element_case_generator<T, integral_rand_generator<T>>;\n\ntemplate <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nclass floating_rand_generator\n{\npublic:\n    T operator()(const uint64_t &upper) const\n    {\n        return static_cast<T>(rand::next_u64(upper)) +\n               static_cast<T>(rand::next_u64(upper)) / static_cast<T>(upper);\n    }\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_nth_element_case_generator =\n    nth_element_case_generator<T, floating_rand_generator<T>>;\n\n// Finder class based on perf_counter in comparison with other finders for multiple nth elements.\nclass perf_counter_nth_element_finder\n{\npublic:\n    using container_type = typename std::vector<int64_t>;\n    using size_type = typename container_type::size_type;\n\n    perf_counter_nth_element_finder()\n        : _perf_counter(\"benchmark\",\n                        \"perf_counter_number_percentile_atomic\",\n                        \"nth_element\",\n                        COUNTER_TYPE_NUMBER_PERCENTILES,\n                        \"nth_element implementation by perf_counter_number_percentile_atomic\",\n                        false),\n          _elements(COUNTER_PERCENTILE_COUNT, int64_t())\n    {\n    }\n\n    void load_data(const container_type &array)\n    {\n        _perf_counter._tail.store(0, std::memory_order_relaxed);\n        for (const auto &e : array) {\n            _perf_counter.set(e);\n        }\n    }\n\n    void operator()()\n    {\n        _perf_counter.calc(\n            boost::make_shared<dsn::perf_counter_number_percentile_atomic::compute_context>());\n        std::copy(_perf_counter._results,\n                  _perf_counter._results + COUNTER_PERCENTILE_COUNT,\n                  _elements.begin());\n    }\n\n    const container_type &elements() const { return _elements; }\n\nprivate:\n    dsn::perf_counter_number_percentile_atomic _perf_counter;\n    container_type _elements;\n\n    DISALLOW_COPY_AND_ASSIGN(perf_counter_nth_element_finder);\n};\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/output_utils_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include \"dsn/utility/output_utils.h\"\n\n#include <gtest/gtest.h>\n\n#include <vector>\n#include <string>\n\nusing std::vector;\nusing std::string;\nusing dsn::utils::table_printer;\n\nnamespace dsn {\n\nconst vector<string>\n    single_column_tp_output({\"[tp1]\\n\"\n                             \"row1  : 1.23\\n\"\n                             \"row2  : 2345\\n\"\n                             \"row3  : 3456\\n\",\n                             R\"*(\"tp1\":{\"row1\":\"1.23\",\"row2\":\"2345\",\"row3\":\"3456\"})*\",\n                             R\"*(    \"tp1\": {)*\"\n                             \"\\n\"\n                             R\"*(        \"row1\": \"1.23\",)*\"\n                             \"\\n\"\n                             R\"*(        \"row2\": \"2345\",)*\"\n                             \"\\n\"\n                             R\"*(        \"row3\": \"3456\")*\"\n                             \"\\n\"\n                             \"    }\"});\n\nconst vector<string> multi_columns_tp_output(\n    {\"[tp2]\\n\"\n     \"multi_columns_test  col0    col1    col2    \\n\"\n     \"row0                data00  data01  data02  \\n\"\n     \"row1                data10  data11  data12  \\n\"\n     \"row2                data20  data21  data22  \\n\",\n     R\"*(\"tp2\":{\"row0\":{\"multi_columns_test\":\"row0\",\"col0\":\"data00\",\"col1\":\"data01\",\"col2\":\"data02\"},\"row1\":{\"multi_columns_test\":\"row1\",\"col0\":\"data10\",\"col1\":\"data11\",\"col2\":\"data12\"},\"row2\":{\"multi_columns_test\":\"row2\",\"col0\":\"data20\",\"col1\":\"data21\",\"col2\":\"data22\"}})*\",\n     R\"*(    \"tp2\": {)*\"\n     \"\\n\"\n     R\"*(        \"row0\": {)*\"\n     \"\\n\"\n     R\"*(            \"multi_columns_test\": \"row0\",)*\"\n     \"\\n\"\n     R\"*(            \"col0\": \"data00\",)*\"\n     \"\\n\"\n     R\"*(            \"col1\": \"data01\",)*\"\n     \"\\n\"\n     R\"*(            \"col2\": \"data02\")*\"\n     \"\\n\"\n     \"        },\\n\"\n     R\"*(        \"row1\": {)*\"\n     \"\\n\"\n     R\"*(            \"multi_columns_test\": \"row1\",)*\"\n     \"\\n\"\n     R\"*(            \"col0\": \"data10\",)*\"\n     \"\\n\"\n     R\"*(            \"col1\": \"data11\",)*\"\n     \"\\n\"\n     R\"*(            \"col2\": \"data12\")*\"\n     \"\\n\"\n     \"        },\\n\"\n     R\"*(        \"row2\": {)*\"\n     \"\\n\"\n     R\"*(            \"multi_columns_test\": \"row2\",)*\"\n     \"\\n\"\n     R\"*(            \"col0\": \"data20\",)*\"\n     \"\\n\"\n     R\"*(            \"col1\": \"data21\",)*\"\n     \"\\n\"\n     R\"*(            \"col2\": \"data22\")*\"\n     \"\\n\"\n     \"        }\\n\"\n     \"    }\"});\n\nutils::table_printer generate_single_column_tp()\n{\n    utils::table_printer tp(\"tp1\", 2, 2);\n    tp.add_row_name_and_data(\"row1\", 1.234);\n    tp.add_row_name_and_data(\"row2\", 2345);\n    tp.add_row_name_and_data(\"row3\", \"3456\");\n    return tp;\n}\n\nutils::table_printer generate_multi_columns_tp()\n{\n    int kColumnCount = 3;\n    int kRowCount = 3;\n    utils::table_printer tp(\"tp2\", 2, 2);\n    tp.add_title(\"multi_columns_test\");\n    for (int i = 0; i < kColumnCount; i++) {\n        tp.add_column(\"col\" + std::to_string(i));\n    }\n    for (int i = 0; i < kRowCount; i++) {\n        tp.add_row(\"row\" + std::to_string(i));\n        for (int j = 0; j < kColumnCount; j++) {\n            tp.append_data(\"data\" + std::to_string(i) + std::to_string(j));\n        }\n    }\n    return tp;\n}\n\ntemplate <typename P>\nvoid check_output(const P &printer, const vector<string> &expect_output)\n{\n    static vector<table_printer::output_format> output_formats(\n        {table_printer::output_format::kTabular,\n         table_printer::output_format::kJsonCompact,\n         table_printer::output_format::kJsonPretty});\n    ASSERT_EQ(expect_output.size(), output_formats.size());\n    for (int i = 0; i < output_formats.size(); i++) {\n        std::ostringstream out;\n        printer.output(out, output_formats[i]);\n        ASSERT_EQ(expect_output[i], out.str());\n    }\n}\n\nTEST(table_printer_test, empty_content_test)\n{\n    utils::table_printer tp;\n    ASSERT_NO_FATAL_FAILURE(check_output(tp, {\"\", \"{}\\n\", \"{}\\n\"}));\n}\n\nTEST(table_printer_test, empty_name_test)\n{\n    utils::table_printer tp;\n    tp.add_row_name_and_data(\"row1\", 1.234);\n    ASSERT_NO_FATAL_FAILURE(check_output(tp,\n                                         {\"row1  : 1.23\\n\",\n                                          R\"*({\"row1\":\"1.23\"})*\"\n                                          \"\\n\",\n                                          \"{\\n\"\n                                          R\"*(    \"row1\": \"1.23\")*\"\n                                          \"\\n}\\n\"}));\n}\n\nTEST(table_printer_test, single_column_test)\n{\n    utils::table_printer tp(generate_single_column_tp());\n    ASSERT_NO_FATAL_FAILURE(check_output(tp,\n                                         {single_column_tp_output[0],\n                                          \"{\" + single_column_tp_output[1] + \"}\\n\",\n                                          \"{\\n\" + single_column_tp_output[2] + \"\\n}\\n\"}));\n}\n\nTEST(table_printer_test, multi_columns_test)\n{\n    utils::table_printer tp(generate_multi_columns_tp());\n    ASSERT_NO_FATAL_FAILURE(check_output(tp,\n                                         {multi_columns_tp_output[0],\n                                          \"{\" + multi_columns_tp_output[1] + \"}\\n\",\n                                          \"{\\n\" + multi_columns_tp_output[2] + \"\\n}\\n\"}));\n}\n\nTEST(multi_table_printer_test, empty_content_test)\n{\n    utils::multi_table_printer mtp;\n    ASSERT_NO_FATAL_FAILURE(check_output(mtp, {\"\", \"{}\\n\", \"{}\\n\"}));\n}\n\nTEST(multi_table_printer_test, single_empty_sub_test)\n{\n    utils::multi_table_printer mtp;\n    utils::table_printer tp;\n    mtp.add(std::move(tp));\n    ASSERT_NO_FATAL_FAILURE(check_output(mtp, {\"\\n\", \"{}\\n\", \"{}\\n\"}));\n}\n\nTEST(multi_table_printer_test, multi_sub_test)\n{\n    utils::multi_table_printer mtp;\n    mtp.add(generate_single_column_tp());\n    mtp.add(generate_multi_columns_tp());\n    ASSERT_NO_FATAL_FAILURE(check_output(\n        mtp,\n        {single_column_tp_output[0] + \"\\n\" + multi_columns_tp_output[0] + \"\\n\",\n         \"{\" + single_column_tp_output[1] + \",\" + multi_columns_tp_output[1] + \"}\\n\",\n         \"{\\n\" + single_column_tp_output[2] + \",\\n\" + multi_columns_tp_output[2] + \"\\n}\\n\"}));\n}\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/percentile_utils.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <memory>\n#include <set>\n#include <type_traits>\n#include <vector>\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/utility/metrics.h>\n#include <dsn/dist/fmt_logging.h>\n\n#include \"nth_element_utils.h\"\n\nnamespace dsn {\n\n// The generator is used to produce the test cases randomly for unit tests and benchmarks of\n// percentile. This is implemented by converting kth percentiles to nth indexes, and calling\n// nth_element_case_generator to generate data and nth elements.\ntemplate <typename NthElementCaseGenerator,\n          typename = typename std::enable_if<\n              std::is_arithmetic<typename NthElementCaseGenerator::value_type>::value>::type>\nclass percentile_case_generator\n{\npublic:\n    using value_type = typename NthElementCaseGenerator::value_type;\n    using container_type = typename NthElementCaseGenerator::container_type;\n    using size_type = typename NthElementCaseGenerator::size_type;\n    using nth_container_type = typename NthElementCaseGenerator::nth_container_type;\n\n    percentile_case_generator(size_type data_size,\n                              value_type initial_value,\n                              uint64_t range_size,\n                              const std::set<kth_percentile_type> &kth_percentiles)\n        : _nth_element_gen()\n    {\n        nth_container_type nths;\n        nths.reserve(kth_percentiles.size());\n        for (const auto &kth : kth_percentiles) {\n            auto size = static_cast<size_t>(data_size);\n            auto nth = static_cast<size_type>(kth_percentile_to_nth_index(size, kth));\n            nths.push_back(nth);\n        }\n\n        _nth_element_gen.reset(\n            new NthElementCaseGenerator(data_size, initial_value, range_size, nths));\n    }\n\n    ~percentile_case_generator() = default;\n\n    // Call nth_element_case_generator internally to generate out-of-order `data` sized `data_size`\n    // and nth elements. See nth_element_case_generator for detailed implementations.\n    void operator()(container_type &data, container_type &elements)\n    {\n        (*_nth_element_gen)(data, elements);\n    }\n\nprivate:\n    std::unique_ptr<NthElementCaseGenerator> _nth_element_gen;\n\n    DISALLOW_COPY_AND_ASSIGN(percentile_case_generator);\n};\n\ntemplate <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>\nusing integral_percentile_case_generator =\n    percentile_case_generator<integral_nth_element_case_generator<T>>;\n\ntemplate <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>\nusing floating_percentile_case_generator =\n    percentile_case_generator<floating_nth_element_case_generator<T>>;\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/priority_queue.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for priority_queue.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/priority_queue.h>\n#include <gtest/gtest.h>\n#include <thread>\n\nusing namespace ::dsn::utils;\n\nstruct queue_data\n{\n    int32_t priority;\n    int32_t queue_index;\n\n    queue_data(int32_t pri, int32_t idx) : priority(pri), queue_index(idx) {}\n};\n\ntypedef priority_queue<queue_data *, 3> my_priority_queue;\nTEST(core, priority_queue)\n{\n    my_priority_queue q(\"my_priority_queue_name\");\n    ASSERT_EQ(\"my_priority_queue_name\", q.get_name());\n    ASSERT_EQ(0, q.count());\n    ASSERT_EQ(nullptr, q.dequeue());\n\n    std::vector<queue_data> datas;\n    datas.push_back(queue_data(0, 1));\n    datas.push_back(queue_data(2, 1));\n    datas.push_back(queue_data(1, 1));\n    datas.push_back(queue_data(1, 2));\n    datas.push_back(queue_data(2, 2));\n    datas.push_back(queue_data(0, 2));\n    datas.push_back(queue_data(1, 3));\n    datas.push_back(queue_data(0, 3));\n    datas.push_back(queue_data(2, 3));\n\n    for (int i = 0; i < datas.size(); ++i) {\n        ASSERT_EQ(i, q.count());\n        queue_data *d = &datas[i];\n        ASSERT_EQ(i + 1, q.enqueue(d, d->priority));\n        ASSERT_EQ(i + 1, q.count());\n    }\n\n    std::vector<queue_data> sort_datas(datas);\n    std::sort(sort_datas.begin(), sort_datas.end(), [](const queue_data &l, const queue_data &r) {\n        return l.priority > r.priority ||\n               (l.priority == r.priority && l.queue_index < r.queue_index);\n    });\n\n    int count = sort_datas.size();\n    for (int i = 0; i < count; ++i) {\n        ASSERT_EQ(count, q.count());\n        queue_data *d = nullptr;\n        if (i % 2 == 0) {\n            d = q.dequeue();\n        } else {\n            long ct;\n            d = q.dequeue(ct);\n            ASSERT_EQ(count - 1, ct);\n        }\n        ASSERT_EQ(sort_datas[i].priority, d->priority);\n        ASSERT_EQ(sort_datas[i].queue_index, d->queue_index);\n        ASSERT_EQ(count - 1, q.count());\n        count--;\n    }\n}\n\ntypedef blocking_priority_queue<queue_data *, 3> my_blocking_priority_queue;\nTEST(core, blocking_priority_queue)\n{\n    my_blocking_priority_queue q(\"my_blocking_priority_queue_name\");\n    ASSERT_EQ(\"my_blocking_priority_queue_name\", q.get_name());\n    ASSERT_EQ(0, q.count());\n\n    long ct;\n    ASSERT_EQ(nullptr, q.dequeue_with_timeout(ct, 10));\n    ASSERT_EQ(0, ct);\n\n    ASSERT_EQ(1, q.enqueue(new queue_data(0, 10), 0));\n    queue_data *d = q.dequeue_with_timeout(ct, 10);\n    ASSERT_NE(nullptr, d);\n    ASSERT_EQ(0, ct);\n    ASSERT_EQ(0, d->priority);\n    ASSERT_EQ(10, d->queue_index);\n    delete d;\n\n    bool flag = false;\n\n    std::thread t1([&q, &flag]() {\n        long ct;\n        queue_data *d = nullptr;\n\n        d = q.dequeue_with_timeout(ct, 10);\n        ASSERT_EQ(nullptr, d);\n        ASSERT_EQ(0, ct);\n\n        flag = true;\n\n        d = q.dequeue(ct);\n        ASSERT_NE(nullptr, d);\n        ASSERT_EQ(0, ct);\n        ASSERT_EQ(1, d->priority);\n        ASSERT_EQ(20, d->queue_index);\n        delete d;\n\n        d = q.dequeue_with_timeout(ct, 10);\n        ASSERT_EQ(nullptr, d);\n        ASSERT_EQ(0, ct);\n    });\n\n    std::thread t2([&q, &flag]() {\n        while (!flag)\n            std::this_thread::sleep_for(std::chrono::milliseconds(10));\n        ASSERT_EQ(1, q.enqueue(new queue_data(1, 20), 1));\n    });\n\n    t1.join();\n    t2.join();\n}\n"
  },
  {
    "path": "src/utils/test/rand_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utility/rand.h>\n#include <gtest/gtest.h>\n#include <thread>\n\nnamespace dsn {\n\nTEST(random, sanity)\n{\n    { // edge cases\n        ASSERT_EQ(rand::next_u64(0), 0);\n        ASSERT_EQ(rand::next_u64(0, 0), 0);\n        ASSERT_EQ(rand::next_u32(0), 0);\n        ASSERT_EQ(rand::next_u32(0, 0), 0);\n\n        ASSERT_EQ(rand::next_u64(12, 12), 12);\n        ASSERT_EQ(rand::next_u32(12, 12), 12);\n    }\n\n    constexpr int kTestSize = 1000;\n\n    { // 32-bit repeatability, uniqueness\n        rand::reseed_thread_local_rng(0xdeadbeef);\n        std::vector<uint32_t> vals(kTestSize);\n        for (int i = 0; i < kTestSize; ++i) {\n            vals[i] = rand::next_u32();\n        }\n\n        rand::reseed_thread_local_rng(0xdeadbeef);\n        for (int i = 0; i < kTestSize; ++i) {\n            ASSERT_EQ(rand::next_u32(), vals[i]);\n        }\n    }\n\n    { // 64-bit repeatability, uniqueness\n        rand::reseed_thread_local_rng(0xdeadbeef);\n        std::vector<uint64_t> vals(kTestSize);\n        for (int i = 0; i < kTestSize; ++i) {\n            vals[i] = rand::next_u64();\n        }\n\n        rand::reseed_thread_local_rng(0xdeadbeef);\n        for (int i = 0; i < kTestSize; ++i) {\n            ASSERT_EQ(rand::next_u64(), vals[i]);\n        }\n    }\n}\n\nTEST(random, multi_threaded)\n{\n    const int n = 100;\n    std::vector<uint32_t> seeds(n);\n    std::vector<std::thread> threads;\n    for (int i = 0; i < n; ++i) {\n        threads.push_back(std::thread([i, &seeds] { seeds[i] = rand::next_u32(); }));\n    }\n    for (auto &t : threads) {\n        t.join();\n    }\n    std::sort(seeds.begin(), seeds.end());\n    for (int i = 0; i < n - 1; ++i) {\n        EXPECT_LT(seeds[i], seeds[i + 1]);\n    }\n}\n\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/run.sh",
    "content": "#!/bin/sh\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\noutput_xml=\"${REPORT_DIR}/dsn_utils_tests.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn_utils_tests\n"
  },
  {
    "path": "src/utils/test/sema.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     Unit-test for semaphore.\n *\n * Revision history:\n *     Nov., 2015, @qinzuoyan (Zuoyan Qin), first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/hpc_locks/sema.h>\n#include <gtest/gtest.h>\n#include <thread>\n\nTEST(core, Semaphore)\n{\n    Semaphore s;\n\n    ASSERT_FALSE(s.wait(10));\n\n    s.signal();\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.wait(10));\n\n    s.signal(2);\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.wait(10));\n\n    std::thread t1([&s]() {\n        ASSERT_FALSE(s.wait(10));\n\n        s.wait();\n    });\n\n    std::thread t2([&s]() {\n        std::this_thread::sleep_for(std::chrono::milliseconds(50));\n\n        s.signal(2);\n\n        s.wait();\n    });\n\n    t1.join();\n    t2.join();\n}\n\nTEST(core, LightweightSemaphore)\n{\n    LightweightSemaphore s;\n\n    ASSERT_FALSE(s.wait(10));\n\n    s.signal();\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.tryWait());\n    ASSERT_FALSE(s.wait(10));\n\n    s.signal(2);\n    ASSERT_TRUE(s.tryWait());\n    ASSERT_TRUE(s.wait(10));\n    ASSERT_FALSE(s.tryWait());\n    ASSERT_FALSE(s.wait(10));\n\n    bool flag = false;\n\n    std::thread t1([&s, &flag]() {\n        ASSERT_FALSE(s.tryWait());\n        ASSERT_FALSE(s.wait(10));\n\n        flag = true;\n\n        s.wait();\n    });\n\n    std::thread t2([&s, &flag]() {\n        while (!flag)\n            std::this_thread::sleep_for(std::chrono::milliseconds(10));\n\n        s.signal(2);\n\n        s.wait();\n    });\n\n    t1.join();\n    t2.join();\n}\n"
  },
  {
    "path": "src/utils/test/smart_pointers_test.cpp",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <dsn/utility/smart_pointers.h>\n\n#include <gtest/gtest.h>\n\nusing namespace dsn;\n\nTEST(MakeUniqueTest, Basic)\n{\n    std::unique_ptr<std::string> p = make_unique<std::string>();\n    EXPECT_EQ(\"\", *p);\n    p = make_unique<std::string>(\"hi\");\n    EXPECT_EQ(\"hi\", *p);\n}\n\nstruct MoveOnly\n{\n    MoveOnly() = default;\n    explicit MoveOnly(int i1) : ip1{new int{i1}} {}\n    MoveOnly(int i1, int i2) : ip1{new int{i1}}, ip2{new int{i2}} {}\n    std::unique_ptr<int> ip1;\n    std::unique_ptr<int> ip2;\n};\n\nstruct AcceptMoveOnly\n{\n    explicit AcceptMoveOnly(MoveOnly m) : m_(std::move(m)) {}\n    MoveOnly m_;\n};\n\nTEST(MakeUniqueTest, MoveOnlyTypeAndValue)\n{\n    using ExpectedType = std::unique_ptr<MoveOnly>;\n    {\n        auto p = make_unique<MoveOnly>();\n        static_assert(std::is_same<decltype(p), ExpectedType>::value, \"unexpected return type\");\n        EXPECT_TRUE(!p->ip1);\n        EXPECT_TRUE(!p->ip2);\n    }\n    {\n        auto p = make_unique<MoveOnly>(1);\n        static_assert(std::is_same<decltype(p), ExpectedType>::value, \"unexpected return type\");\n        EXPECT_TRUE(p->ip1 && *p->ip1 == 1);\n        EXPECT_TRUE(!p->ip2);\n    }\n    {\n        auto p = make_unique<MoveOnly>(1, 2);\n        static_assert(std::is_same<decltype(p), ExpectedType>::value, \"unexpected return type\");\n        EXPECT_TRUE(p->ip1 && *p->ip1 == 1);\n        EXPECT_TRUE(p->ip2 && *p->ip2 == 2);\n    }\n}\n\nTEST(MakeUniqueTest, AcceptMoveOnly)\n{\n    auto p = make_unique<AcceptMoveOnly>(MoveOnly());\n    p = std::unique_ptr<AcceptMoveOnly>(new AcceptMoveOnly(MoveOnly()));\n}\n\nstruct ArrayWatch\n{\n    void *operator new[](size_t n)\n    {\n        allocs().push_back(n);\n        return ::operator new[](n);\n    }\n    void operator delete[](void *p) { return ::operator delete[](p); }\n    static std::vector<size_t> &allocs()\n    {\n        static auto &v = *new std::vector<size_t>;\n        return v;\n    }\n};\n\nTEST(Make_UniqueTest, Array)\n{\n    // Ensure state is clean before we start so that these tests\n    // are order-agnostic.\n    ArrayWatch::allocs().clear();\n\n    auto p = make_unique<ArrayWatch[]>(5);\n    static_assert(std::is_same<decltype(p), std::unique_ptr<ArrayWatch[]>>::value,\n                  \"unexpected return type\");\n\n    // TODO(wutao1): fix this. EXPECT_THAT is not available since it's an gmock macro,\n    // but we do not depend on gmock.\n    // EXPECT_THAT(ArrayWatch::allocs(), ElementsAre(5 * sizeof(ArrayWatch)));\n}\n\nTEST(Make_UniqueTest, NotAmbiguousWithStdMakeUnique)\n{\n    // Ensure that make_unique is not ambiguous with std::make_unique.\n    // In C++14 mode, the below call to make_unique has both types as candidates.\n    struct TakesStdType\n    {\n        explicit TakesStdType(const std::vector<int> &vec) {}\n    };\n    make_unique<TakesStdType>(std::vector<int>());\n}\n"
  },
  {
    "path": "src/utils/test/string_conv_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utility/string_conv.h>\n#include <gtest/gtest.h>\n\nTEST(string_conv, buf2bool)\n{\n    bool result = false;\n\n    ASSERT_TRUE(dsn::buf2bool(\"true\", result));\n    ASSERT_EQ(result, true);\n\n    ASSERT_TRUE(dsn::buf2bool(\"TrUe\", result));\n    ASSERT_EQ(result, true);\n\n    ASSERT_FALSE(dsn::buf2bool(\"TrUe\", result, false));\n\n    ASSERT_TRUE(dsn::buf2bool(\"false\", result));\n    ASSERT_EQ(result, false);\n\n    ASSERT_TRUE(dsn::buf2bool(\"FalSe\", result));\n    ASSERT_EQ(result, false);\n\n    ASSERT_FALSE(dsn::buf2bool(\"TrUe\", result, false));\n\n    std::string str(\"true\\0false\", 10);\n    ASSERT_FALSE(dsn::buf2bool(dsn::string_view(str.data(), 3), result));\n    ASSERT_TRUE(dsn::buf2bool(dsn::string_view(str.data(), 4), result));\n    ASSERT_EQ(result, true);\n    ASSERT_FALSE(dsn::buf2bool(dsn::string_view(str.data(), 5), result));\n    ASSERT_FALSE(dsn::buf2bool(dsn::string_view(str.data(), 6), result));\n    ASSERT_FALSE(dsn::buf2bool(dsn::string_view(str.data() + 5, 4), result));\n    ASSERT_TRUE(dsn::buf2bool(dsn::string_view(str.data() + 5, 5), result));\n    ASSERT_EQ(result, false);\n}\n\nTEST(string_conv, buf2int32)\n{\n    int32_t result = -1;\n\n    ASSERT_TRUE(dsn::buf2int32(std::to_string(0), result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_TRUE(dsn::buf2int32(\"0xbeef\", result));\n    ASSERT_EQ(result, 0xbeef);\n\n    ASSERT_TRUE(dsn::buf2int32(\"0xBEEF\", result));\n    ASSERT_EQ(result, 0xbeef);\n\n    ASSERT_TRUE(dsn::buf2int32(std::to_string(42), result));\n    ASSERT_EQ(result, 42);\n\n    ASSERT_TRUE(dsn::buf2int32(std::to_string(-42), result));\n    ASSERT_EQ(result, -42);\n\n    ASSERT_TRUE(dsn::buf2int32(std::to_string(std::numeric_limits<int32_t>::min()), result));\n    ASSERT_EQ(result, std::numeric_limits<int32_t>::min());\n\n    ASSERT_TRUE(dsn::buf2int32(std::to_string(std::numeric_limits<int32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<int32_t>::max());\n\n    ASSERT_FALSE(dsn::buf2int32(std::to_string(std::numeric_limits<int64_t>::max()), result));\n    ASSERT_FALSE(dsn::buf2int32(std::to_string(std::numeric_limits<int64_t>::min()), result));\n    ASSERT_FALSE(dsn::buf2int32(std::to_string(std::numeric_limits<uint64_t>::max()), result));\n\n    // \"\\045\" is \"%\", so the string length=5, otherwise(2th argument > 5) it will be reported\n    // \"global-buffer-overflow\" error under AddressSanitizer check\n    std::string str(\"123\\0456\", 5);\n    ASSERT_TRUE(dsn::buf2int32(dsn::string_view(str.data(), 2), result));\n    ASSERT_EQ(result, 12);\n    ASSERT_TRUE(dsn::buf2int32(dsn::string_view(str.data(), 3), result));\n    ASSERT_EQ(result, 123);\n    ASSERT_FALSE(dsn::buf2int32(dsn::string_view(str.data(), 4), result));\n    ASSERT_FALSE(dsn::buf2int32(dsn::string_view(str.data(), 5), result));\n}\n\nTEST(string_conv, buf2int64)\n{\n    int64_t result = -1;\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(0), result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_TRUE(dsn::buf2int64(\"0xdeadbeef\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2int64(\"0xDEADBEEF\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(42), result));\n    ASSERT_EQ(result, 42);\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(-42), result));\n    ASSERT_EQ(result, -42);\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(std::numeric_limits<int32_t>::min()), result));\n    ASSERT_EQ(result, std::numeric_limits<int32_t>::min());\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(std::numeric_limits<int32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<int32_t>::max());\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(std::numeric_limits<uint32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint32_t>::max());\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(std::numeric_limits<int64_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<int64_t>::max());\n\n    ASSERT_TRUE(dsn::buf2int64(std::to_string(std::numeric_limits<int64_t>::min()), result));\n    ASSERT_EQ(result, std::numeric_limits<int64_t>::min());\n\n    ASSERT_FALSE(dsn::buf2int64(std::to_string(std::numeric_limits<uint64_t>::max()), result));\n\n    // \"\\045\" is \"%\", so the string length=5, otherwise(2th argument > 5) it will be reported\n    // \"global-buffer-overflow\" error under AddressSanitizer check\n    std::string str(\"123\\0456\", 5);\n    ASSERT_TRUE(dsn::buf2int64(dsn::string_view(str.data(), 2), result));\n    ASSERT_EQ(result, 12);\n    ASSERT_TRUE(dsn::buf2int64(dsn::string_view(str.data(), 3), result));\n    ASSERT_EQ(result, 123);\n    ASSERT_FALSE(dsn::buf2int64(dsn::string_view(str.data(), 4), result));\n    ASSERT_FALSE(dsn::buf2int64(dsn::string_view(str.data(), 5), result));\n}\n\nTEST(string_conv, buf2uint64)\n{\n    uint64_t result = 1;\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(0), result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_TRUE(dsn::buf2uint64(\"-0\", result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_FALSE(dsn::buf2uint64(\"-1\", result));\n\n    ASSERT_TRUE(dsn::buf2uint64(\"0xdeadbeef\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2uint64(\"0xDEADBEEF\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(42), result));\n    ASSERT_EQ(result, 42);\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(std::numeric_limits<int32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<int32_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(std::numeric_limits<uint32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint32_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(std::numeric_limits<uint64_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint64_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint64(std::to_string(std::numeric_limits<uint64_t>::min()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint64_t>::min());\n\n    // \"\\045\" is \"%\", so the string length=5, otherwise(2th argument > 5) it will be reported\n    // \"global-buffer-overflow\" error under AddressSanitizer check\n    std::string str(\"123\\0456\", 5);\n    ASSERT_TRUE(dsn::buf2uint64(dsn::string_view(str.data(), 2), result));\n    ASSERT_EQ(result, 12);\n    ASSERT_TRUE(dsn::buf2uint64(dsn::string_view(str.data(), 3), result));\n    ASSERT_EQ(result, 123);\n    ASSERT_FALSE(dsn::buf2uint64(dsn::string_view(str.data(), 4), result));\n    ASSERT_FALSE(dsn::buf2uint64(dsn::string_view(str.data(), 5), result));\n}\n\nTEST(string_conv, buf2uint32)\n{\n    uint32_t result = 1;\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(0), result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_TRUE(dsn::buf2uint32(\"-0\", result));\n    ASSERT_EQ(result, 0);\n\n    ASSERT_FALSE(dsn::buf2uint32(\"-1\", result));\n\n    ASSERT_TRUE(dsn::buf2uint32(\"0xdeadbeef\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2uint32(\"0xDEADBEEF\", result));\n    ASSERT_EQ(result, 0xdeadbeef);\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(42), result));\n    ASSERT_EQ(result, 42);\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(std::numeric_limits<int16_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<int16_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(std::numeric_limits<uint16_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint16_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(std::numeric_limits<uint32_t>::max()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint32_t>::max());\n\n    ASSERT_TRUE(dsn::buf2uint32(std::to_string(std::numeric_limits<uint32_t>::min()), result));\n    ASSERT_EQ(result, std::numeric_limits<uint32_t>::min());\n\n    ASSERT_FALSE(dsn::buf2uint32(std::to_string(std::numeric_limits<uint64_t>::max()), result));\n\n    // \"\\045\" is \"%\", so the string length=5, otherwise(2th argument > 5) it will be reported\n    // \"global-buffer-overflow\" error under AddressSanitizer check\n    std::string str(\"123\\0456\", 5);\n    ASSERT_TRUE(dsn::buf2uint32(dsn::string_view(str.data(), 2), result));\n    ASSERT_EQ(result, 12);\n    ASSERT_TRUE(dsn::buf2uint32(dsn::string_view(str.data(), 3), result));\n    ASSERT_EQ(result, 123);\n    ASSERT_FALSE(dsn::buf2uint32(dsn::string_view(str.data(), 4), result));\n    ASSERT_FALSE(dsn::buf2uint32(dsn::string_view(str.data(), 5), result));\n}\n\nTEST(string_conv, int64_partial)\n{\n    int64_t result = 0;\n    ASSERT_FALSE(dsn::buf2int64(\"\", result));\n    ASSERT_FALSE(dsn::buf2int64(\" \", result)) << result;\n    ASSERT_FALSE(dsn::buf2int64(\"-\", result)) << result;\n    ASSERT_FALSE(dsn::buf2int64(\"123@@@\", result));\n    ASSERT_FALSE(dsn::buf2int64(\"@@@123\", result));\n    ASSERT_FALSE(dsn::buf2int64(\"0xdeadbeeg\", result));\n\n    const int64_t int64_min = std::numeric_limits<int64_t>::min();\n    const int64_t int64_max = std::numeric_limits<int64_t>::max();\n    ASSERT_FALSE(dsn::buf2int64(std::to_string(int64_min) + std::to_string(int64_max), result));\n    ASSERT_FALSE(dsn::buf2int64(std::to_string(int64_max) + std::to_string(int64_max), result));\n}\n\nTEST(string_conv, uint64_partial)\n{\n    uint64_t result = 0;\n    ASSERT_FALSE(dsn::buf2uint64(\"\", result));\n    ASSERT_FALSE(dsn::buf2uint64(\" \", result)) << result;\n    ASSERT_FALSE(dsn::buf2uint64(\"-\", result)) << result;\n    ASSERT_FALSE(dsn::buf2uint64(\"123@@@\", result));\n    ASSERT_FALSE(dsn::buf2uint64(\"@@@123\", result));\n    ASSERT_FALSE(dsn::buf2uint64(\"0xdeadbeeg\", result));\n\n    ASSERT_FALSE(dsn::buf2uint64(std::to_string(-1), result));\n    ASSERT_FALSE(\n        dsn::buf2uint64(std::to_string(std::numeric_limits<uint64_t>::max()).append(\"0\"), result));\n}\n\nTEST(string_conv, buf2double)\n{\n    double result = 0;\n\n    ASSERT_TRUE(dsn::buf2double(\"1.1\", result));\n    ASSERT_DOUBLE_EQ(result, 1.1);\n\n    ASSERT_TRUE(dsn::buf2double(\"0.0\", result));\n    ASSERT_DOUBLE_EQ(result, 0.0);\n    ASSERT_TRUE(dsn::buf2double(\"-0.0\", result));\n    ASSERT_DOUBLE_EQ(result, 0.0);\n\n    ASSERT_TRUE(dsn::buf2double(\"-1.1\", result));\n    ASSERT_DOUBLE_EQ(result, -1.1);\n\n    ASSERT_TRUE(dsn::buf2double(\"1.2e3\", result));\n    ASSERT_DOUBLE_EQ(result, 1200.0);\n\n    ASSERT_TRUE(dsn::buf2double(\"1.2E3\", result));\n    ASSERT_DOUBLE_EQ(result, 1200.0);\n\n    ASSERT_TRUE(dsn::buf2double(\"1e0\", result));\n    ASSERT_DOUBLE_EQ(result, 1.0);\n\n    ASSERT_TRUE(dsn::buf2double(\"0x1.2p3\", result));\n    ASSERT_DOUBLE_EQ(result, 0x1.2p3);\n    ASSERT_DOUBLE_EQ(result, 9.0);\n\n    ASSERT_TRUE(dsn::buf2double(\"0X1.2P3\", result));\n    ASSERT_DOUBLE_EQ(result, 0x1.2p3);\n    ASSERT_DOUBLE_EQ(result, 9.0);\n\n    /// bad case\n    ASSERT_FALSE(dsn::buf2double(\"nan\", result));\n    ASSERT_FALSE(dsn::buf2double(\"NaN\", result));\n    ASSERT_FALSE(dsn::buf2double(\"-nan\", result));\n    ASSERT_FALSE(dsn::buf2double(\"-NAN\", result));\n    ASSERT_FALSE(dsn::buf2double(\"inf\", result));\n    ASSERT_FALSE(dsn::buf2double(\"-INF\", result));\n    ASSERT_FALSE(dsn::buf2double(\"INFINITY\", result));\n    ASSERT_FALSE(dsn::buf2double(\"abc\", result));\n    ASSERT_FALSE(dsn::buf2double(\"1.18973e+4932\", result));\n}\n"
  },
  {
    "path": "src/utils/test/string_view_test.cpp",
    "content": "// Copyright 2017 The Abseil Authors.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n//      http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\n#include <dsn/utility/string_view.h>\n\n#include <gtest/gtest.h>\n\nnamespace {\n\n// Separated from STL1() because some compilers produce an overly\n// large stack frame for the combined function.\nTEST(StringViewTest, STL2)\n{\n    const dsn::string_view a(\"abcdefghijklmnopqrstuvwxyz\");\n    const dsn::string_view b(\"abc\");\n    const dsn::string_view c(\"xyz\");\n    dsn::string_view d(\"foobar\");\n    const dsn::string_view e;\n    const dsn::string_view f(\"123\"\n                             \"\\0\"\n                             \"456\",\n                             7);\n\n    d = dsn::string_view();\n    EXPECT_EQ(d.size(), 0);\n    EXPECT_TRUE(d.empty());\n    EXPECT_TRUE(d.data() == nullptr);\n    EXPECT_TRUE(d.begin() == d.end());\n\n    EXPECT_EQ(a.find(b), 0);\n    EXPECT_EQ(a.find(b, 1), dsn::string_view::npos);\n    EXPECT_EQ(a.find(c), 23);\n    EXPECT_EQ(a.find(c, 9), 23);\n    EXPECT_EQ(a.find(c, dsn::string_view::npos), dsn::string_view::npos);\n    EXPECT_EQ(b.find(c), dsn::string_view::npos);\n    EXPECT_EQ(b.find(c, dsn::string_view::npos), dsn::string_view::npos);\n    EXPECT_EQ(a.find(d), 0);\n    EXPECT_EQ(a.find(e), 0);\n    EXPECT_EQ(a.find(d, 12), 12);\n    EXPECT_EQ(a.find(e, 17), 17);\n    dsn::string_view g(\"xx not found bb\");\n    EXPECT_EQ(a.find(g), dsn::string_view::npos);\n    // empty std::string nonsense\n    EXPECT_EQ(d.find(b), dsn::string_view::npos);\n    EXPECT_EQ(e.find(b), dsn::string_view::npos);\n    EXPECT_EQ(d.find(b, 4), dsn::string_view::npos);\n    EXPECT_EQ(e.find(b, 7), dsn::string_view::npos);\n\n    size_t empty_search_pos = std::string().find(std::string());\n    EXPECT_EQ(d.find(d), empty_search_pos);\n    EXPECT_EQ(d.find(e), empty_search_pos);\n    EXPECT_EQ(e.find(d), empty_search_pos);\n    EXPECT_EQ(e.find(e), empty_search_pos);\n    EXPECT_EQ(d.find(d, 4), std::string().find(std::string(), 4));\n    EXPECT_EQ(d.find(e, 4), std::string().find(std::string(), 4));\n    EXPECT_EQ(e.find(d, 4), std::string().find(std::string(), 4));\n    EXPECT_EQ(e.find(e, 4), std::string().find(std::string(), 4));\n}\n\n// Continued from STL2\nTEST(StringViewTest, STL2Substr)\n{\n    const dsn::string_view a(\"abcdefghijklmnopqrstuvwxyz\");\n    const dsn::string_view b(\"abc\");\n    const dsn::string_view c(\"xyz\");\n    dsn::string_view d(\"foobar\");\n    const dsn::string_view e;\n\n    d = dsn::string_view();\n    EXPECT_EQ(a.substr(0, 3), b);\n    EXPECT_EQ(a.substr(23), c);\n    EXPECT_EQ(a.substr(23, 3), c);\n    EXPECT_EQ(a.substr(23, 99), c);\n    EXPECT_EQ(a.substr(0), a);\n    EXPECT_EQ(a.substr(3, 2), \"de\");\n    // empty std::string nonsense\n    EXPECT_EQ(d.substr(0, 99), e);\n    // use of npos\n    EXPECT_EQ(a.substr(0, dsn::string_view::npos), a);\n    EXPECT_EQ(a.substr(23, dsn::string_view::npos), c);\n\n    EXPECT_THROW(a.substr(99, 2), std::out_of_range);\n}\n\nTEST(StringViewTest, Ctor)\n{\n    {\n        // Null.\n        dsn::string_view s10;\n        EXPECT_TRUE(s10.data() == nullptr);\n        EXPECT_EQ(0, s10.length());\n    }\n\n    {\n        // const char* without length.\n        const char *hello = \"hello\";\n        dsn::string_view s20(hello);\n        EXPECT_TRUE(s20.data() == hello);\n        EXPECT_EQ(5, s20.length());\n\n        // const char* with length.\n        dsn::string_view s21(hello, 4);\n        EXPECT_TRUE(s21.data() == hello);\n        EXPECT_EQ(4, s21.length());\n\n        // Not recommended, but valid C++\n        dsn::string_view s22(hello, 6);\n        EXPECT_TRUE(s22.data() == hello);\n        EXPECT_EQ(6, s22.length());\n    }\n\n    {\n        // std::string.\n        std::string hola = \"hola\";\n        dsn::string_view s30(hola);\n        EXPECT_TRUE(s30.data() == hola.data());\n        EXPECT_EQ(4, s30.length());\n\n        // std::string with embedded '\\0'.\n        hola.push_back('\\0');\n        hola.append(\"h2\");\n        hola.push_back('\\0');\n        dsn::string_view s31(hola);\n        EXPECT_TRUE(s31.data() == hola.data());\n        EXPECT_EQ(8, s31.length());\n    }\n}\n\nTEST(StringViewTest, Swap)\n{\n    dsn::string_view a(\"a\");\n    dsn::string_view b(\"bbb\");\n    EXPECT_TRUE(noexcept(a.swap(b)));\n    a.swap(b);\n    EXPECT_EQ(a, \"bbb\");\n    EXPECT_EQ(b, \"a\");\n    a.swap(b);\n    EXPECT_EQ(a, \"a\");\n    EXPECT_EQ(b, \"bbb\");\n}\n\n#define EXPECT_COMPARE_TRUE(op, x, y)                                                              \\\n    EXPECT_TRUE(dsn::string_view((x)) op dsn::string_view((y)));                                   \\\n    EXPECT_TRUE(dsn::string_view((x)).compare(dsn::string_view((y))) op 0)\n\n#define EXPECT_COMPARE_FALSE(op, x, y)                                                             \\\n    EXPECT_FALSE(dsn::string_view((x)) op dsn::string_view((y)));                                  \\\n    EXPECT_FALSE(dsn::string_view((x)).compare(dsn::string_view((y))) op 0)\n\nTEST(StringViewTest, ComparisonOperators)\n{\n    EXPECT_COMPARE_FALSE(==, \"a\", \"\");\n    EXPECT_COMPARE_FALSE(==, \"\", \"a\");\n    EXPECT_COMPARE_FALSE(==, \"a\", \"b\");\n    EXPECT_COMPARE_FALSE(==, \"a\", \"aa\");\n    EXPECT_COMPARE_FALSE(==, \"aa\", \"a\");\n\n    EXPECT_COMPARE_TRUE(==, \"\", \"\");\n    EXPECT_COMPARE_TRUE(==, \"\", dsn::string_view());\n    EXPECT_COMPARE_TRUE(==, dsn::string_view(), \"\");\n    EXPECT_COMPARE_TRUE(==, \"a\", \"a\");\n    EXPECT_COMPARE_TRUE(==, \"aa\", \"aa\");\n\n    EXPECT_COMPARE_FALSE(!=, \"\", \"\");\n    EXPECT_COMPARE_FALSE(!=, \"a\", \"a\");\n    EXPECT_COMPARE_FALSE(!=, \"aa\", \"aa\");\n\n    EXPECT_COMPARE_TRUE(!=, \"a\", \"\");\n    EXPECT_COMPARE_TRUE(!=, \"\", \"a\");\n    EXPECT_COMPARE_TRUE(!=, \"a\", \"b\");\n    EXPECT_COMPARE_TRUE(!=, \"a\", \"aa\");\n    EXPECT_COMPARE_TRUE(!=, \"aa\", \"a\");\n}\n\nTEST(StringViewTest, STL1)\n{\n    const dsn::string_view a(\"abcdefghijklmnopqrstuvwxyz\");\n    const dsn::string_view b(\"abc\");\n    const dsn::string_view c(\"xyz\");\n    const dsn::string_view d(\"foobar\");\n    const dsn::string_view e;\n    std::string temp(\"123\");\n    temp += '\\0';\n    temp += \"456\";\n    const dsn::string_view f(temp);\n\n    EXPECT_EQ(a[6], 'g');\n    EXPECT_EQ(b[0], 'a');\n    EXPECT_EQ(c[2], 'z');\n    EXPECT_EQ(f[3], '\\0');\n    EXPECT_EQ(f[5], '5');\n\n    EXPECT_EQ(*d.data(), 'f');\n    EXPECT_EQ(d.data()[5], 'r');\n    EXPECT_TRUE(e.data() == nullptr);\n\n    EXPECT_EQ(*a.begin(), 'a');\n    EXPECT_EQ(*(b.begin() + 2), 'c');\n    EXPECT_EQ(*(c.end() - 1), 'z');\n\n    EXPECT_EQ(*a.rbegin(), 'z');\n    EXPECT_EQ(*(b.rbegin() + 2), 'a');\n    EXPECT_EQ(*(c.rend() - 1), 'x');\n    EXPECT_TRUE(a.rbegin() + 26 == a.rend());\n\n    EXPECT_EQ(a.size(), 26);\n    EXPECT_EQ(b.size(), 3);\n    EXPECT_EQ(c.size(), 3);\n    EXPECT_EQ(d.size(), 6);\n    EXPECT_EQ(e.size(), 0);\n    EXPECT_EQ(f.size(), 7);\n\n    EXPECT_TRUE(!d.empty());\n    EXPECT_TRUE(d.begin() != d.end());\n    EXPECT_TRUE(d.begin() + 6 == d.end());\n\n    EXPECT_TRUE(e.empty());\n    EXPECT_TRUE(e.begin() == e.end());\n}\n\nTEST(StringViewTest, Remove)\n{\n    dsn::string_view a(\"foobar\");\n    std::string s1(\"123\");\n    s1 += '\\0';\n    s1 += \"456\";\n    dsn::string_view b(s1);\n    dsn::string_view e;\n    std::string s2;\n\n    // remove_prefix\n    dsn::string_view c(a);\n    c.remove_prefix(3);\n    EXPECT_EQ(c, \"bar\");\n    c = a;\n    c.remove_prefix(0);\n    EXPECT_EQ(c, a);\n    c.remove_prefix(c.size());\n    EXPECT_EQ(c, e);\n\n    // remove_suffix\n    c = a;\n    c.remove_suffix(3);\n    EXPECT_EQ(c, \"foo\");\n    c = a;\n    c.remove_suffix(0);\n    EXPECT_EQ(c, a);\n    c.remove_suffix(c.size());\n    EXPECT_EQ(c, e);\n}\n\nTEST(StringViewTest, Set)\n{\n    dsn::string_view a(\"foobar\");\n    dsn::string_view empty;\n    dsn::string_view b;\n\n    // set\n    b = dsn::string_view(\"foobar\", 6);\n    EXPECT_EQ(b, a);\n    b = dsn::string_view(\"foobar\", 0);\n    EXPECT_EQ(b, empty);\n    b = dsn::string_view(\"foobar\", 7);\n    EXPECT_NE(b, a);\n\n    b = dsn::string_view(\"foobar\");\n    EXPECT_EQ(b, a);\n}\n\nTEST(StringViewTest, FrontBack)\n{\n    static const char arr[] = \"abcd\";\n    const dsn::string_view csp(arr, 4);\n    EXPECT_EQ(&arr[0], &csp.front());\n    EXPECT_EQ(&arr[3], &csp.back());\n}\n\nTEST(StringViewTest, FrontBackSingleChar)\n{\n    static const char c = 'a';\n    const dsn::string_view csp(&c, 1);\n    EXPECT_EQ(&c, &csp.front());\n    EXPECT_EQ(&c, &csp.back());\n}\n\nTEST(StringViewTest, NULLInput)\n{\n    dsn::string_view s;\n    EXPECT_EQ(s.data(), nullptr);\n    EXPECT_EQ(s.size(), 0);\n\n    s = dsn::string_view(nullptr);\n    EXPECT_EQ(s.data(), nullptr);\n    EXPECT_EQ(s.size(), 0);\n\n    EXPECT_EQ(\"\", std::string(s));\n}\n\nTEST(StringViewTest, ExplicitConversionOperator)\n{\n    dsn::string_view sp = \"hi\";\n    EXPECT_EQ(sp, std::string(sp));\n}\n\nTEST(StringViewTest, Noexcept)\n{\n    EXPECT_TRUE((std::is_nothrow_constructible<dsn::string_view, const std::string &>::value));\n    EXPECT_TRUE((std::is_nothrow_constructible<dsn::string_view, const std::string &>::value));\n    EXPECT_TRUE(std::is_nothrow_constructible<dsn::string_view>::value);\n    constexpr dsn::string_view sp;\n    EXPECT_TRUE(noexcept(sp.begin()));\n    EXPECT_TRUE(noexcept(sp.end()));\n    EXPECT_TRUE(noexcept(sp.cbegin()));\n    EXPECT_TRUE(noexcept(sp.cend()));\n    EXPECT_TRUE(noexcept(sp.rbegin()));\n    EXPECT_TRUE(noexcept(sp.rend()));\n    EXPECT_TRUE(noexcept(sp.crbegin()));\n    EXPECT_TRUE(noexcept(sp.crend()));\n    EXPECT_TRUE(noexcept(sp.size()));\n    EXPECT_TRUE(noexcept(sp.length()));\n    EXPECT_TRUE(noexcept(sp.empty()));\n    EXPECT_TRUE(noexcept(sp.data()));\n    EXPECT_TRUE(noexcept(sp.compare(sp)));\n    EXPECT_TRUE(noexcept(sp.find(sp)));\n}\n\nTEST(StringViewTest, HeterogenousStringViewEquals)\n{\n    EXPECT_EQ(dsn::string_view(\"hello\"), std::string(\"hello\"));\n    EXPECT_EQ(\"hello\", dsn::string_view(\"hello\"));\n}\n\nTEST(StringViewTest, FindConformance)\n{\n    struct\n    {\n        std::string haystack;\n        std::string needle;\n    } specs[] = {\n        {\"\", \"\"},\n        {\"\", \"a\"},\n        {\"a\", \"\"},\n        {\"a\", \"a\"},\n        {\"a\", \"b\"},\n        {\"aa\", \"\"},\n        {\"aa\", \"a\"},\n        {\"aa\", \"b\"},\n        {\"ab\", \"a\"},\n        {\"ab\", \"b\"},\n        {\"abcd\", \"\"},\n        {\"abcd\", \"a\"},\n        {\"abcd\", \"d\"},\n        {\"abcd\", \"ab\"},\n        {\"abcd\", \"bc\"},\n        {\"abcd\", \"cd\"},\n        {\"abcd\", \"abcd\"},\n    };\n    for (const auto &s : specs) {\n        SCOPED_TRACE(s.haystack);\n        SCOPED_TRACE(s.needle);\n        std::string st = s.haystack;\n        dsn::string_view sp = s.haystack;\n        for (size_t i = 0; i <= sp.size(); ++i) {\n            size_t pos = (i == sp.size()) ? dsn::string_view::npos : i;\n            SCOPED_TRACE(pos);\n            EXPECT_EQ(sp.find(s.needle, pos), st.find(s.needle, pos));\n        }\n    }\n}\n\nclass StringViewStreamTest : public ::testing::Test\n{\npublic:\n    // Set negative 'width' for right justification.\n    template <typename T>\n    std::string Pad(const T &s, int width, char fill = 0)\n    {\n        std::ostringstream oss;\n        if (fill != 0) {\n            oss << std::setfill(fill);\n        }\n        if (width < 0) {\n            width = -width;\n            oss << std::right;\n        }\n        oss << std::setw(width) << s;\n        return oss.str();\n    }\n};\n\nTEST_F(StringViewStreamTest, Padding)\n{\n    std::string s(\"hello\");\n    dsn::string_view sp(s);\n    for (int w = -64; w < 64; ++w) {\n        SCOPED_TRACE(w);\n        EXPECT_EQ(Pad(s, w), Pad(sp, w));\n    }\n    for (int w = -64; w < 64; ++w) {\n        SCOPED_TRACE(w);\n        EXPECT_EQ(Pad(s, w, '#'), Pad(sp, w, '#'));\n    }\n}\n\nTEST_F(StringViewStreamTest, ResetsWidth)\n{\n    // Width should reset after one formatted write.\n    // If we weren't resetting width after formatting the string_view,\n    // we'd have width=5 carrying over to the printing of the \"]\",\n    // creating \"[###hi####]\".\n    std::string s = \"hi\";\n    dsn::string_view sp = s;\n    {\n        std::ostringstream oss;\n        oss << \"[\" << std::setfill('#') << std::setw(5) << s << \"]\";\n        ASSERT_EQ(\"[###hi]\", oss.str());\n    }\n    {\n        std::ostringstream oss;\n        oss << \"[\" << std::setfill('#') << std::setw(5) << sp << \"]\";\n        EXPECT_EQ(\"[###hi]\", oss.str());\n    }\n}\n\n} // namespace\n"
  },
  {
    "path": "src/utils/test/time_utils_test.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <dsn/utils/time_utils.h>\n#include <gtest/gtest.h>\n#include <dsn/c/api_layer1.h>\n\nnamespace dsn {\nnamespace utils {\n\nTEST(time_utils, hh_mm_to_seconds)\n{\n    ASSERT_EQ(hh_mm_to_seconds(\"00:00\"), 0);\n    ASSERT_EQ(hh_mm_to_seconds(\"23:59\"), 86340);\n    ASSERT_EQ(hh_mm_to_seconds(\"1:1\"), 3660);\n    ASSERT_EQ(hh_mm_to_seconds(\"01:1\"), 3660);\n    ASSERT_EQ(hh_mm_to_seconds(\"1:01\"), 3660);\n    ASSERT_EQ(hh_mm_to_seconds(\"01:01\"), 3660);\n\n    ASSERT_EQ(hh_mm_to_seconds(\"23\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"23:\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\":59\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"-1:00\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"24:00\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"01:-1\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"01:60\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"a:00\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"01:b\"), -1);\n    ASSERT_EQ(hh_mm_to_seconds(\"01b\"), -1);\n}\n\nTEST(time_utils, get_unix_sec_today_midnight)\n{\n    ASSERT_LT(0, get_unix_sec_today_midnight());\n    ASSERT_LE(get_unix_sec_today_midnight(), time(nullptr));\n    ASSERT_GE(time(nullptr) - get_unix_sec_today_midnight(), 0);\n    ASSERT_LT(time(nullptr) - get_unix_sec_today_midnight(), 86400);\n}\n\nTEST(time_utils, hh_mm_today_to_unix_sec)\n{\n    ASSERT_EQ(get_unix_sec_today_midnight() + hh_mm_to_seconds(\"0:0\"),\n              hh_mm_today_to_unix_sec(\"0:0\"));\n    ASSERT_EQ(get_unix_sec_today_midnight() + hh_mm_to_seconds(\"23:59\"),\n              hh_mm_today_to_unix_sec(\"23:59\"));\n\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"23\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"23:\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\":59\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"-1:00\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"24:00\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"01:-1\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"01:60\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"a:00\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"01:b\"), -1);\n    ASSERT_EQ(hh_mm_today_to_unix_sec(\"01b\"), -1);\n}\n\nTEST(time_utils, get_current_physical_time_ns)\n{\n    int64_t ts_ns = get_current_physical_time_ns();\n    ASSERT_LT(0, ts_ns);\n    ASSERT_GE(get_current_physical_time_ns() - ts_ns, 0);\n    ASSERT_LT(get_current_physical_time_ns() - ts_ns, 1e7); // < 10 ms\n}\n\nTEST(time_utils, time_ms_to_string)\n{\n    char buf[64];\n    time_ms_to_string(1605091506136, buf);\n    // time differ between time zones,\n    // the real time 2020-11-11 18:45:06.136 (UTC+8)\n    // so it must be 2020-11-1x xx:45:06.136\n    ASSERT_EQ(std::string(buf).substr(0, 9), \"2020-11-1\");\n    ASSERT_EQ(std::string(buf).substr(13, 10), \":45:06.136\");\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/token_bucket_throttling_controller_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <dsn/utils/token_bucket_throttling_controller.h>\n#include <gtest/gtest.h>\n\nnamespace dsn {\nnamespace utils {\n\n#define INVALIDATE_SITUATION_CHECK(env)                                                            \\\n    do {                                                                                           \\\n        std::string old_value, parse_err;                                                          \\\n        bool env_changed_result = false;                                                           \\\n        ASSERT_FALSE(cntl.parse_from_env(env, 4, parse_err, env_changed_result, old_value));       \\\n        ASSERT_EQ(env_changed_result, false);                                                      \\\n        ASSERT_EQ(parse_err, \"wrong format, you can set like 20000 or 20K\");                       \\\n        ASSERT_EQ(cntl._enabled, true);                                                            \\\n        ASSERT_EQ(old_value, old_env);                                                             \\\n        ASSERT_EQ(cntl._env_value, old_env);                                                       \\\n    } while (0)\n\n#define VALIDATE_SITUATION_CHECK(                                                                  \\\n    env, partition_count, throttle_size, enabled, env_changed, old_env)                            \\\n    do {                                                                                           \\\n        bool env_changed_result = false;                                                           \\\n        std::string old_value, parse_err;                                                          \\\n        int32_t partitioned_throttle_size = throttle_size / partition_count;                       \\\n        ASSERT_TRUE(                                                                               \\\n            cntl.parse_from_env(env, partition_count, parse_err, env_changed_result, old_value));  \\\n        ASSERT_EQ(cntl._env_value, env);                                                           \\\n        ASSERT_EQ(cntl._partition_count, partition_count);                                         \\\n        ASSERT_EQ(cntl._burstsize, partitioned_throttle_size);                                     \\\n        ASSERT_EQ(cntl._rate, partitioned_throttle_size);                                          \\\n        ASSERT_EQ(cntl._enabled, enabled);                                                         \\\n        ASSERT_EQ(env_changed_result, env_changed);                                                \\\n        ASSERT_EQ(old_value, old_env);                                                             \\\n        ASSERT_EQ(parse_err, \"\");                                                                  \\\n    } while (0)\n\nclass token_bucket_throttling_controller_test : public ::testing::Test\n{\npublic:\n    void test_parse_env_basic_token_bucket_throttling()\n    {\n        token_bucket_throttling_controller cntl;\n\n        // token_bucket_throttling_controller doesn't support delay only\n        VALIDATE_SITUATION_CHECK(\"20000*delay*100\", 4, 0, false, true, \"\");\n        VALIDATE_SITUATION_CHECK(\"200K\", 4, 200000, true, true, \"20000*delay*100\");\n        VALIDATE_SITUATION_CHECK(\"20000*delay*100,20000*reject*100\", 4, 20000, true, true, \"200K\");\n        VALIDATE_SITUATION_CHECK(\"20K*delay*100,20K*reject*100\",\n                                 4,\n                                 20000,\n                                 true,\n                                 true,\n                                 \"20000*delay*100,20000*reject*100\");\n        VALIDATE_SITUATION_CHECK(\n            \"20000*reject*100\", 4, 20000, true, true, \"20K*delay*100,20K*reject*100\");\n\n        // invalid argument]\n        std::string old_env = \"20000*reject*100\";\n        INVALIDATE_SITUATION_CHECK(\"0\");\n        INVALIDATE_SITUATION_CHECK(\"*deldday*100\");\n        INVALIDATE_SITUATION_CHECK(\"\");\n        INVALIDATE_SITUATION_CHECK(\"*reject\");\n        INVALIDATE_SITUATION_CHECK(\"*reject*\");\n        INVALIDATE_SITUATION_CHECK(\"reject*\");\n        INVALIDATE_SITUATION_CHECK(\"reject\");\n        INVALIDATE_SITUATION_CHECK(\"200g\");\n        INVALIDATE_SITUATION_CHECK(\"200G\");\n        INVALIDATE_SITUATION_CHECK(\"M\");\n        INVALIDATE_SITUATION_CHECK(\"K\");\n        INVALIDATE_SITUATION_CHECK(\"-1K\");\n        INVALIDATE_SITUATION_CHECK(\"1aK\");\n        INVALIDATE_SITUATION_CHECK(\"pegNo1\");\n        INVALIDATE_SITUATION_CHECK(\"-20\");\n        INVALIDATE_SITUATION_CHECK(\"12KM\");\n        INVALIDATE_SITUATION_CHECK(\"1K2M\");\n        INVALIDATE_SITUATION_CHECK(\"2000K0*reject*100\");\n    }\n\n    void throttle_test()\n    {\n        auto cntl = std::make_unique<token_bucket_throttling_controller>();\n        std::string parse_err;\n        bool env_changed = false;\n        std::string old_value;\n        const int partition_count = 4;\n\n        int throttle_limit = 200000;\n        cntl->parse_from_env(\n            std::to_string(throttle_limit), partition_count, parse_err, env_changed, old_value);\n\n        auto token_bucket = std::make_unique<DynamicTokenBucket>();\n        int fail_count = 0;\n        for (int i = 0; i < 100000; i++) {\n            token_bucket->consumeWithBorrowAndWait(\n                1, throttle_limit / partition_count * 0.8, throttle_limit / partition_count * 1.0);\n            cntl->consume_token(1);\n            if (!cntl->available()) {\n                fail_count++;\n            }\n        }\n        ASSERT_EQ(fail_count, 0);\n\n        sleep(1);\n\n        fail_count = 0;\n        for (int i = 0; i < 100000; i++) {\n            token_bucket->consumeWithBorrowAndWait(\n                1, throttle_limit / partition_count * 1.2, throttle_limit / partition_count * 1.5);\n            cntl->consume_token(1);\n            if (!cntl->available()) {\n                fail_count++;\n            }\n        }\n        ASSERT_GT(fail_count, 10000);\n\n        sleep(1);\n\n        fail_count = 0;\n        int fail_count1 = 0;\n        for (int i = 0; i < 200000; i++) {\n            if (i < 100000) {\n                token_bucket->consumeWithBorrowAndWait(1,\n                                                       throttle_limit / partition_count * 1.2,\n                                                       throttle_limit / partition_count * 1.5);\n                fail_count1 = fail_count;\n            } else {\n                token_bucket->consumeWithBorrowAndWait(1,\n                                                       throttle_limit / partition_count * 0.2,\n                                                       throttle_limit / partition_count * 0.3);\n            }\n            if (!cntl->consume_token(1)) {\n                fail_count++;\n            }\n        }\n        ASSERT_GT(fail_count1, 10000);\n        ASSERT_LE(fail_count, fail_count1 * 1.2);\n    }\n};\n\nTEST_F(token_bucket_throttling_controller_test, test_parse_env_basic_token_bucket_throttling)\n{\n    test_parse_env_basic_token_bucket_throttling();\n}\n\nTEST_F(token_bucket_throttling_controller_test, throttle_test) { throttle_test(); }\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/token_buckets_test.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include <gtest/gtest.h>\n#include <dsn/utils/token_buckets.h>\n\nnamespace dsn {\nnamespace utils {\n\nclass token_buckets_test : public testing::Test\n{\npublic:\n    std::unique_ptr<dsn::utils::token_buckets> _token_buckets_wrapper;\n\n    void SetUp() override\n    {\n        _token_buckets_wrapper = std::make_unique<dsn::utils::token_buckets>();\n    }\n\n    std::unordered_map<std::string, std::shared_ptr<folly::DynamicTokenBucket>>\n    token_buckets() const\n    {\n        return _token_buckets_wrapper->_token_buckets;\n    }\n};\n\nTEST_F(token_buckets_test, test_token_buckets)\n{\n    auto token1 = _token_buckets_wrapper->get_token_bucket(\"test1\");\n    auto token2 = _token_buckets_wrapper->get_token_bucket(\"test1\");\n    ASSERT_EQ(token_buckets().size(), 1);\n    ASSERT_EQ(token1, token2);\n\n    auto token3 = _token_buckets_wrapper->get_token_bucket(\"test2\");\n    ASSERT_EQ(token_buckets().size(), 2);\n    ASSERT_NE(token1, token3);\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/test/utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/strings.h>\n#include <dsn/utility/binary_reader.h>\n#include <dsn/utility/binary_writer.h>\n#include <dsn/utility/link.h>\n#include <dsn/utility/crc.h>\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/c/api_layer1.h>\n#include <gtest/gtest.h>\n#include <dsn/utility/rand.h>\n\nusing namespace ::dsn;\nusing namespace ::dsn::utils;\n\nTEST(core, get_last_component)\n{\n    ASSERT_EQ(\"a\", get_last_component(\"a\", \"/\"));\n    ASSERT_EQ(\"b\", get_last_component(\"a/b\", \"/\"));\n    ASSERT_EQ(\"b\", get_last_component(\"a//b\", \"/\"));\n    ASSERT_EQ(\"\", get_last_component(\"a/\", \"/\"));\n    ASSERT_EQ(\"c\", get_last_component(\"a/b_c\", \"/_\"));\n}\n\nTEST(core, crc)\n{\n    char buffer[24];\n    for (int i = 0; i < sizeof(buffer) / sizeof(char); i++) {\n        buffer[i] = rand::next_u32(0, 200);\n    }\n\n    auto c1 = dsn::utils::crc32_calc(buffer, 12, 0);\n    auto c2 = dsn::utils::crc32_calc(buffer + 12, 12, c1);\n    auto c3 = dsn::utils::crc32_calc(buffer, 24, 0);\n    auto c4 = dsn::utils::crc32_concat(0, 0, c1, 12, c1, c2, 12);\n    EXPECT_TRUE(c3 == c4);\n}\n\nTEST(core, binary_io)\n{\n    int value = 0xdeadbeef;\n    binary_writer writer;\n    writer.write(value);\n\n    auto buf = writer.get_buffer();\n    binary_reader reader(buf);\n    int value3;\n    reader.read(value3);\n\n    EXPECT_TRUE(value3 == value);\n}\n\nTEST(core, split_args)\n{\n    std::string value = \"a ,b, c \";\n    std::vector<std::string> sargs;\n    std::list<std::string> sargs2;\n    ::dsn::utils::split_args(value.c_str(), sargs, ',');\n    ::dsn::utils::split_args(value.c_str(), sargs2, ',');\n\n    EXPECT_EQ(sargs.size(), 3);\n    EXPECT_EQ(sargs[0], \"a\");\n    EXPECT_EQ(sargs[1], \"b\");\n    EXPECT_EQ(sargs[2], \"c\");\n\n    EXPECT_EQ(sargs2.size(), 3);\n    auto it = sargs2.begin();\n    EXPECT_EQ(*it++, \"a\");\n    EXPECT_EQ(*it++, \"b\");\n    EXPECT_EQ(*it++, \"c\");\n\n    std::unordered_set<std::string> sargs_set;\n    dsn::utils::split_args(value.c_str(), sargs_set, ',');\n    EXPECT_EQ(sargs_set.size(), 3);\n\n    // test value = \"\"\n    value = \"\";\n    sargs.clear();\n    dsn::utils::split_args(value.c_str(), sargs, ',');\n    EXPECT_EQ(sargs.size(), 0);\n\n    sargs2.clear();\n    dsn::utils::split_args(value.c_str(), sargs2, ',');\n    EXPECT_EQ(sargs2.size(), 0);\n\n    sargs_set.clear();\n    dsn::utils::split_args(value.c_str(), sargs_set, ',');\n    EXPECT_EQ(sargs_set.size(), 0);\n}\n\nTEST(core, split_args_keep_place_holder)\n{\n    std::string value = \"a ,b, c \";\n    std::vector<std::string> sargs;\n    ::dsn::utils::split_args(value.c_str(), sargs, ',', true);\n\n    EXPECT_EQ(sargs.size(), 3);\n    EXPECT_EQ(sargs[0], \"a\");\n    EXPECT_EQ(sargs[1], \"b\");\n    EXPECT_EQ(sargs[2], \"c\");\n\n    value = \" ,  a ,b, c \";\n    sargs.clear();\n    ::dsn::utils::split_args(value.c_str(), sargs, ',', true);\n\n    EXPECT_EQ(sargs.size(), 4);\n    EXPECT_EQ(sargs[0], \"\");\n    EXPECT_EQ(sargs[1], \"a\");\n    EXPECT_EQ(sargs[2], \"b\");\n    EXPECT_EQ(sargs[3], \"c\");\n\n    value = \"a ,b, , c\";\n    sargs.clear();\n    ::dsn::utils::split_args(value.c_str(), sargs, ',', true);\n\n    EXPECT_EQ(sargs.size(), 4);\n    EXPECT_EQ(sargs[0], \"a\");\n    EXPECT_EQ(sargs[1], \"b\");\n    EXPECT_EQ(sargs[2], \"\");\n    EXPECT_EQ(sargs[3], \"c\");\n\n    value = \"a ,b, c , \";\n    sargs.clear();\n    ::dsn::utils::split_args(value.c_str(), sargs, ',', true);\n\n    EXPECT_EQ(sargs.size(), 4);\n    EXPECT_EQ(sargs[0], \"a\");\n    EXPECT_EQ(sargs[1], \"b\");\n    EXPECT_EQ(sargs[2], \"c\");\n    EXPECT_EQ(sargs[3], \"\");\n\n    value = \", a ,b, ,c , \";\n    sargs.clear();\n    ::dsn::utils::split_args(value.c_str(), sargs, ',', true);\n\n    EXPECT_EQ(sargs.size(), 6);\n    EXPECT_EQ(sargs[0], \"\");\n    EXPECT_EQ(sargs[1], \"a\");\n    EXPECT_EQ(sargs[2], \"b\");\n    EXPECT_EQ(sargs[3], \"\");\n    EXPECT_EQ(sargs[4], \"c\");\n    EXPECT_EQ(sargs[5], \"\");\n}\n\nTEST(core, trim_string)\n{\n    std::string value = \" x x x x \";\n    auto r = trim_string((char *)value.c_str());\n    EXPECT_EQ(std::string(r), \"x x x x\");\n}\n\nTEST(core, dlink)\n{\n    dlink links[10];\n    dlink hdr;\n\n    for (int i = 0; i < 10; i++)\n        links[i].insert_before(&hdr);\n\n    int count = 0;\n    dlink *p = hdr.next();\n    while (p != &hdr) {\n        count++;\n        p = p->next();\n    }\n\n    EXPECT_EQ(count, 10);\n\n    p = hdr.next();\n    while (p != &hdr) {\n        auto p1 = p;\n        p = p->next();\n        p1->remove();\n        count--;\n    }\n\n    EXPECT_TRUE(hdr.is_alone());\n    EXPECT_TRUE(count == 0);\n}\n\nclass foo : public ::dsn::ref_counter\n{\npublic:\n    foo(int &count) : _count(count) { _count++; }\n\n    ~foo() { _count--; }\n\nprivate:\n    int &_count;\n};\n\ntypedef ::dsn::ref_ptr<foo> foo_ptr;\n\nTEST(core, ref_ptr)\n{\n    int count = 0;\n    foo_ptr x = nullptr;\n    auto y = new foo(count);\n    x = y;\n    EXPECT_TRUE(x->get_count() == 1);\n    EXPECT_TRUE(count == 1);\n    x = new foo(count);\n    EXPECT_TRUE(x->get_count() == 1);\n    EXPECT_TRUE(count == 1);\n    x = nullptr;\n    EXPECT_TRUE(count == 0);\n\n    std::map<int, foo_ptr> xs;\n    x = new foo(count);\n    EXPECT_TRUE(x->get_count() == 1);\n    EXPECT_TRUE(count == 1);\n    xs.insert(std::make_pair(1, x));\n    EXPECT_TRUE(x->get_count() == 2);\n    EXPECT_TRUE(count == 1);\n    x = nullptr;\n    EXPECT_TRUE(count == 1);\n    xs.clear();\n    EXPECT_TRUE(count == 0);\n\n    x = new foo(count);\n    EXPECT_TRUE(count == 1);\n    xs[2] = x;\n    EXPECT_TRUE(x->get_count() == 2);\n    x = nullptr;\n    EXPECT_TRUE(count == 1);\n    xs.clear();\n    EXPECT_TRUE(count == 0);\n\n    y = new foo(count);\n    EXPECT_TRUE(count == 1);\n    xs.insert(std::make_pair(1, y));\n    EXPECT_TRUE(count == 1);\n    EXPECT_TRUE(y->get_count() == 1);\n    xs.clear();\n    EXPECT_TRUE(count == 0);\n\n    y = new foo(count);\n    EXPECT_TRUE(count == 1);\n    xs[2] = y;\n    EXPECT_TRUE(count == 1);\n    EXPECT_TRUE(y->get_count() == 1);\n    xs.clear();\n    EXPECT_TRUE(count == 0);\n\n    foo_ptr z = new foo(count);\n    EXPECT_TRUE(count == 1);\n    z = foo_ptr();\n    EXPECT_TRUE(count == 0);\n}\n\nTEST(core, flip_map)\n{\n    std::map<int, int> source;\n    source.emplace(3, 1);\n    source.emplace(2, 1);\n    source.emplace(1, 1);\n\n    auto target = flip_map(source);\n    ASSERT_EQ(target.size(), 3);\n    ASSERT_EQ(target.count(1), 3);\n    ASSERT_EQ(target.count(2), 0);\n    ASSERT_EQ(target.count(3), 0);\n    std::string values;\n    for (auto it = target.equal_range(1); it.first != it.second; it.first++) {\n        values += std::to_string(it.first->second);\n    }\n    ASSERT_EQ(values, \"123\");\n}\n\nTEST(core, get_intersection)\n{\n    std::set<int> set1;\n    set1.insert(1);\n    set1.insert(2);\n    set1.insert(3);\n\n    std::set<int> set2;\n    set2.insert(3);\n    set2.insert(4);\n    set2.insert(5);\n\n    auto intersection = utils::get_intersection(set1, set2);\n    ASSERT_EQ(intersection.size(), 1);\n    ASSERT_EQ(*intersection.begin(), 3);\n}\n"
  },
  {
    "path": "src/utils/thread_access_checker.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n#include <dsn/utility/process_utils.h>\n#include <dsn/tool-api/thread_access_checker.h>\n#include <dsn/c/api_utilities.h>\n\nnamespace dsn {\n\nthread_access_checker::thread_access_checker() { _access_thread_id_inited = false; }\n\nthread_access_checker::~thread_access_checker() { _access_thread_id_inited = false; }\n\nvoid thread_access_checker::only_one_thread_access()\n{\n    if (_access_thread_id_inited) {\n        dassert(::dsn::utils::get_current_tid() == _access_thread_id,\n                \"the service is assumed to be accessed by one thread only!\");\n    } else {\n        _access_thread_id = ::dsn::utils::get_current_tid();\n        _access_thread_id_inited = true;\n    }\n}\n}\n"
  },
  {
    "path": "src/utils/throttling_controller.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"throttling_controller.h\"\n\n#include <dsn/c/api_layer1.h>\n#include <dsn/utility/string_conv.h>\n#include <dsn/utility/strings.h>\n\nnamespace dsn {\nnamespace replication {\n\nthrottling_controller::throttling_controller()\n    : _enabled(false),\n      _partition_count(0),\n      _delay_units(0),\n      _delay_ms(0),\n      _reject_units(0),\n      _reject_delay_ms(0),\n      _last_request_time(0),\n      _cur_units(0)\n{\n}\n\nbool throttling_controller::parse_from_env(const std::string &env_value,\n                                           int partition_count,\n                                           std::string &parse_error,\n                                           bool &changed,\n                                           std::string &old_env_value)\n{\n    changed = false;\n    if (_enabled && env_value == _env_value && partition_count == _partition_count)\n        return true;\n    std::vector<std::string> sargs;\n    utils::split_args(env_value.c_str(), sargs, ',', true);\n    if (sargs.empty()) {\n        parse_error = \"empty env value\";\n        return false;\n    }\n    bool delay_parsed = false;\n    int64_t delay_units = 0;\n    int64_t delay_ms = 0;\n    bool reject_parsed = false;\n    int64_t reject_units = 0;\n    int64_t reject_delay_ms = 0;\n    for (std::string &s : sargs) {\n        std::vector<std::string> sargs1;\n        utils::split_args(s.c_str(), sargs1, '*', true);\n        if (sargs1.size() != 3) {\n            parse_error = \"invalid field count, should be 3\";\n            return false;\n        }\n\n        int64_t unit_multiplier = 1;\n        if (!sargs1[0].empty()) {\n            if (*sargs1[0].rbegin() == 'M') {\n                unit_multiplier = 1000 * 1000;\n            } else if (*sargs1[0].rbegin() == 'K') {\n                unit_multiplier = 1000;\n            }\n            if (unit_multiplier != 1) {\n                sargs1[0].pop_back();\n            }\n        }\n        int64_t units = 0;\n        if (!buf2int64(sargs1[0], units) || units < 0) {\n            parse_error = \"invalid units, should be non-negative int\";\n            return false;\n        }\n        units *= unit_multiplier;\n\n        int64_t ms = 0;\n        if (!buf2int64(sargs1[2], ms) || ms < 0) {\n            parse_error = \"invalid delay ms, should be non-negative int\";\n            return false;\n        }\n        if (sargs1[1] == \"delay\") {\n            if (delay_parsed) {\n                parse_error = \"duplicate delay config\";\n                return false;\n            }\n            delay_parsed = true;\n            delay_units = units / partition_count + 1;\n            delay_ms = ms;\n        } else if (sargs1[1] == \"reject\") {\n            if (reject_parsed) {\n                parse_error = \"duplicate reject config\";\n                return false;\n            }\n            reject_parsed = true;\n            reject_units = units / partition_count + 1;\n            reject_delay_ms = ms;\n        } else {\n            parse_error = \"invalid throttling type\";\n            return false;\n        }\n    }\n    changed = true;\n    old_env_value = _env_value;\n    _enabled = true;\n    _env_value = env_value;\n    _partition_count = partition_count;\n    _delay_units = delay_units;\n    _delay_ms = delay_ms;\n    _reject_units = reject_units;\n    _reject_delay_ms = reject_delay_ms;\n    return true;\n}\n\nvoid throttling_controller::reset(bool &changed, std::string &old_env_value)\n{\n    if (_enabled) {\n        changed = true;\n        old_env_value = _env_value;\n        _enabled = false;\n        _env_value.clear();\n        _partition_count = 0;\n        _delay_units = 0;\n        _delay_ms = 0;\n        _reject_units = 0;\n        _reject_delay_ms = 0;\n        _last_request_time = 0;\n        _cur_units = 0;\n    } else {\n        changed = false;\n    }\n}\n\nthrottling_controller::throttling_type throttling_controller::control(\n    const int64_t client_timeout_ms, int32_t request_units, int64_t &delay_ms)\n{\n    // return PASS if throttling controller is not enabled\n    if (!_enabled) {\n        return PASS;\n    }\n\n    int64_t now_s = dsn_now_s();\n    if (now_s != _last_request_time) {\n        _cur_units = 0;\n        _last_request_time = now_s;\n    }\n    _cur_units += request_units;\n    if (_reject_units > 0 && _cur_units > _reject_units) {\n        _cur_units -= request_units;\n        if (client_timeout_ms > 0) {\n            delay_ms = std::min(_reject_delay_ms, client_timeout_ms / 2);\n        } else {\n            delay_ms = _reject_delay_ms;\n        }\n        return REJECT;\n    }\n    if (_delay_units > 0 && _cur_units > _delay_units) {\n        if (client_timeout_ms > 0) {\n            delay_ms = std::min(_delay_ms, client_timeout_ms / 2);\n        } else {\n            delay_ms = _delay_ms;\n        }\n        return DELAY;\n    }\n    return PASS;\n}\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/throttling_controller.h",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#pragma once\n\n#include <stdint.h>\n#include <string>\n\nnamespace dsn {\n\nnamespace replication {\n\n// Used for replica throttling.\n// Different throttling strategies may use different 'request_units', which is\n// the cost of each request. For QPS-based throttling, request_units=1.\n// For size-based throttling, request_units is the bytes size of the incoming\n// request.\n//\n// not thread safe\nclass throttling_controller\n{\npublic:\n    enum throttling_type\n    {\n        PASS,\n        DELAY,\n        REJECT\n    };\n\npublic:\n    throttling_controller();\n\n    // Configures throttling strategy dynamically from app-envs.\n    // The result of `delay_units` and `reject_units` are ensured greater than 0.\n    // If user-given parameter is 0*delay*100, then delay_units=1, likewise for reject_units.\n    //\n    // return true if parse succeed.\n    // return false if parse failed for the reason of invalid env_value.\n    // if return false, the original value will not be changed.\n    // 'parse_error' is set when return false.\n    // 'changed' is set when return true.\n    // 'old_env_value' is set when 'changed' is set to true.\n    bool parse_from_env(const std::string &env_value,\n                        int partition_count,\n                        /*out*/ std::string &parse_error,\n                        /*out*/ bool &changed,\n                        /*out*/ std::string &old_env_value);\n\n    // reset to no throttling.\n    void reset(/*out*/ bool &changed, /*out*/ std::string &old_env_value);\n\n    // return the current env value.\n    const std::string &env_value() const { return _env_value; }\n\n    // do throttling control, return throttling type.\n    // 'delay_ms' is set when the return type is not PASS.\n    throttling_type\n    control(const int64_t client_timeout_ms, int32_t request_units, /*out*/ int64_t &delay_ms);\n\nprivate:\n    friend class throttling_controller_test;\n\n    bool _enabled;\n    std::string _env_value;\n    int32_t _partition_count;\n    int64_t _delay_units;     // should >= 0\n    int64_t _delay_ms;        // should >= 0\n    int64_t _reject_units;    // should >= 0\n    int64_t _reject_delay_ms; // should >= 0\n    int64_t _last_request_time;\n    int64_t _cur_units;\n};\n\n} // namespace replication\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/time_utils.cpp",
    "content": "/*\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements.  See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership.  The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License.  You may obtain a copy of the License at\n *\n *   http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing,\n * software distributed under the License is distributed on an\n * \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n * KIND, either express or implied.  See the License for the\n * specific language governing permissions and limitations\n * under the License.\n */\n\n#include <dsn/utils/time_utils.h>\n#include <fmt/chrono.h>\n#if FMT_VERSION < 60000\n#include <fmt/time.h> // time.h was removed from fmtlib >=6.x\n#endif\n#include <fmt/printf.h>\n\nnamespace dsn {\nnamespace utils {\n\n/*extern*/ void time_ms_to_string(uint64_t ts_ms, char *str)\n{\n    struct tm tmp;\n    auto ret = get_localtime(ts_ms, &tmp);\n    fmt::format_to(str, \"{:%Y-%m-%d %H:%M:%S}.{}\", *ret, static_cast<uint32_t>(ts_ms % 1000));\n}\n\n/*extern*/ void time_ms_to_string(uint64_t ts_ms, std::string &str)\n{\n    str.clear();\n    struct tm tmp;\n    auto ret = get_localtime(ts_ms, &tmp);\n    fmt::format_to(std::back_inserter(str),\n                   \"{:%Y-%m-%d %H:%M:%S}.{}\",\n                   *ret,\n                   static_cast<uint32_t>(ts_ms % 1000));\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/token_bucket_throttling_controller.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"dsn/utils/token_bucket_throttling_controller.h\"\n\n#include <dsn/c/api_utilities.h>\n#include <dsn/utility/string_conv.h>\n\nnamespace dsn {\nnamespace utils {\n\ntoken_bucket_throttling_controller::token_bucket_throttling_controller()\n    : _enabled(false), _partition_count(0), _rate(0), _burstsize(0)\n{\n    _token_bucket = std::make_unique<DynamicTokenBucket>();\n}\n\nbool token_bucket_throttling_controller::consume_token(int32_t request_units)\n{\n    if (!_enabled) {\n        return true;\n    }\n    auto res =\n        _token_bucket->consumeWithBorrowNonBlocking((double)request_units, _rate, _burstsize);\n\n    return (res.get_value_or(0) == 0);\n}\n\nbool token_bucket_throttling_controller::available() const\n{\n    if (!_enabled) {\n        return true;\n    }\n\n    return _token_bucket->available(_rate, _burstsize) > 0;\n}\n\nvoid token_bucket_throttling_controller::reset(bool &changed, std::string &old_env_value)\n{\n    if (_enabled) {\n        changed = true;\n        old_env_value = _env_value;\n        _enabled = false;\n        _env_value.clear();\n        _partition_count = 0;\n        _rate = 0;\n        _burstsize = 0;\n    } else {\n        changed = false;\n    }\n}\n\n// return the current env value.\nconst std::string &token_bucket_throttling_controller::env_value() const { return _env_value; }\n\nbool token_bucket_throttling_controller::parse_from_env(const std::string &env_value,\n                                                        int32_t partition_count,\n                                                        std::string &parse_error,\n                                                        bool &changed,\n                                                        std::string &old_env_value)\n{\n    old_env_value = _env_value;\n    changed = false;\n\n    if (_enabled && dsn_likely(env_value == _env_value) &&\n        dsn_likely(partition_count == _partition_count)) {\n        return true;\n    }\n\n    int64_t reject_size_value;\n    bool enabled;\n    if (!transform_env_string(env_value, reject_size_value, enabled, parse_error)) {\n        return false;\n    }\n\n    changed = true;\n\n    _enabled = enabled;\n    _env_value = env_value;\n    _partition_count = partition_count;\n    _rate = reject_size_value / std::max(partition_count, 1);\n    _burstsize = _rate;\n    return true;\n}\n\nbool token_bucket_throttling_controller::string_to_value(std::string str, int64_t &value)\n{\n    int64_t unit_multiplier = 1;\n    if (*str.rbegin() == 'M') {\n        unit_multiplier = 1000 * 1000;\n    } else if (*str.rbegin() == 'K') {\n        unit_multiplier = 1000;\n    }\n    if (unit_multiplier != 1) {\n        str.pop_back();\n    }\n    if (!buf2int64(str, value) || value < 0) {\n        return false;\n    }\n    value *= unit_multiplier;\n    return true;\n}\n\nbool token_bucket_throttling_controller::validate(const std::string &env, std::string &hint_message)\n{\n    int64_t temp;\n    bool temp_bool;\n    bool validated = transform_env_string(env, temp, temp_bool, hint_message);\n    return validated;\n};\n\nbool token_bucket_throttling_controller::transform_env_string(const std::string &env,\n                                                              int64_t &reject_size_value,\n                                                              bool &enabled,\n                                                              std::string &hint_message)\n{\n    enabled = true;\n\n    if (buf2int64(env, reject_size_value) && reject_size_value > 0) {\n        return true;\n    }\n\n    // format like \"200K\"\n    if (string_to_value(env, reject_size_value) && reject_size_value > 0) {\n        return true;\n    }\n\n    // format like \"20000*delay*100\"\n    if (env.find(\"delay\") != -1 && env.find(\"reject\") == -1) {\n        // rate must > 0 in TokenBucket.h\n        reject_size_value = 1;\n        enabled = false;\n\n        dinfo(\"token_bucket_throttling_controller doesn't support delay method, so throttling \"\n              \"controller is disabled now\");\n        return true;\n    }\n\n    // format like \"20000*delay*100,20000*reject*100\"\n    auto comma_index = env.find(\",\");\n    auto star_index = env.find(\"*reject\", comma_index + 1);\n    if (star_index < 0) {\n        hint_message = \"wrong format, you can set like 20000 or 20K\";\n        return false;\n    }\n    auto reject_size = env.substr(comma_index + 1, star_index - comma_index - 1);\n\n    if (string_to_value(reject_size, reject_size_value) && reject_size_value > 0) {\n        return true;\n    }\n\n    hint_message = \"wrong format, you can set like 20000 or 20K\";\n    return false;\n}\n\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/token_buckets.cpp",
    "content": "// Licensed to the Apache Software Foundation (ASF) under one\n// or more contributor license agreements.  See the NOTICE file\n// distributed with this work for additional information\n// regarding copyright ownership.  The ASF licenses this file\n// to you under the Apache License, Version 2.0 (the\n// \"License\"); you may not use this file except in compliance\n// with the License.  You may obtain a copy of the License at\n//\n//   http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing,\n// software distributed under the License is distributed on an\n// \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n// KIND, either express or implied.  See the License for the\n// specific language governing permissions and limitations\n// under the License.\n\n#include \"dsn/utils/token_buckets.h\"\n\nnamespace dsn {\nnamespace utils {\n\nstd::shared_ptr<folly::DynamicTokenBucket> token_buckets::get_token_bucket(const std::string &name)\n{\n    {\n        utils::auto_read_lock l(_buckets_lock);\n        auto iter = _token_buckets.find(name);\n        if (iter != _token_buckets.end()) {\n            return iter->second;\n        }\n    }\n\n    utils::auto_write_lock l(_buckets_lock);\n    auto iter = _token_buckets.find(name);\n    if (iter != _token_buckets.end()) {\n        return iter->second;\n    }\n\n    auto token = std::make_shared<folly::DynamicTokenBucket>();\n    _token_buckets.emplace(name, token);\n    return token;\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/utils.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     What is this file about?\n *\n * Revision history:\n *     xxxx-xx-xx, author, first version\n *     xxxx-xx-xx, author, fix bug about xxx\n */\n\n#include <dsn/utility/utils.h>\n\n#include <arpa/inet.h>\n#include <ifaddrs.h>\n#include <netdb.h>\n#include <netinet/in.h>\n#include <sys/socket.h>\n\n#include <array>\n#include <fstream>\n#include <iostream>\n#include <memory>\n#include <random>\n\n#include <dsn/service_api_cpp.h>\n#include <dsn/utility/singleton.h>\n#include <sys/stat.h>\n#include <sys/types.h>\n\n#if defined(__linux__)\n#include <sys/syscall.h>\n#include <unistd.h>\n#elif defined(__FreeBSD__)\n#include <sys/thr.h>\n#elif defined(__APPLE__)\n#include <pthread.h>\n#endif\n\nnamespace dsn {\nnamespace utils {\n\nbool hostname_from_ip(uint32_t ip, std::string *hostname_result)\n{\n    struct sockaddr_in addr_in;\n    addr_in.sin_family = AF_INET;\n    addr_in.sin_port = 0;\n    addr_in.sin_addr.s_addr = ip;\n    char hostname[256];\n    int err = getnameinfo((struct sockaddr *)(&addr_in),\n                          sizeof(struct sockaddr),\n                          hostname,\n                          sizeof(hostname),\n                          nullptr,\n                          0,\n                          NI_NAMEREQD);\n    if (err != 0) {\n        struct in_addr net_addr;\n        net_addr.s_addr = ip;\n        char ip_str[256];\n        inet_ntop(AF_INET, &net_addr, ip_str, sizeof(ip_str));\n        if (err == EAI_SYSTEM) {\n            dwarn(\"got error %s when try to resolve %s\", strerror(errno), ip_str);\n        } else {\n            dwarn(\"return error(%s) when try to resolve %s\", gai_strerror(err), ip_str);\n        }\n        return false;\n    } else {\n        *hostname_result = std::string(hostname);\n        return true;\n    }\n}\n\nbool hostname_from_ip(const char *ip, std::string *hostname_result)\n{\n    uint32_t ip_addr;\n    if (inet_pton(AF_INET, ip, &ip_addr) != 1) {\n        // inet_pton() returns 1 on success (network address was successfully converted)\n        *hostname_result = ip;\n        return false;\n    }\n    if (!hostname_from_ip(ip_addr, hostname_result)) {\n        *hostname_result = ip;\n        return false;\n    }\n    return true;\n}\n\nbool hostname_from_ip_port(const char *ip_port, std::string *hostname_result)\n{\n    dsn::rpc_address addr;\n    if (!addr.from_string_ipv4(ip_port)) {\n        dwarn(\"invalid ip_port(%s)\", ip_port);\n        *hostname_result = ip_port;\n        return false;\n    }\n    if (!hostname(addr, hostname_result)) {\n        *hostname_result = ip_port;\n        return false;\n    }\n    return true;\n}\n\nbool hostname(const rpc_address &address, std::string *hostname_result)\n{\n    if (address.type() != HOST_TYPE_IPV4) {\n        return false;\n    }\n    if (hostname_from_ip(htonl(address.ip()), hostname_result)) {\n        *hostname_result += \":\" + std::to_string(address.port());\n        return true;\n    }\n    return false;\n}\n\nbool list_hostname_from_ip(const char *ip_list, std::string *hostname_result_list)\n{\n    std::vector<std::string> splitted_ip;\n    dsn::utils::split_args(ip_list, splitted_ip, ',');\n\n    if (splitted_ip.empty()) {\n        dwarn(\"invalid ip_list(%s)\", ip_list);\n        *hostname_result_list = *ip_list;\n        return false;\n    }\n\n    std::string temp;\n    std::stringstream result;\n    bool all_ok = true;\n    for (int i = 0; i < splitted_ip.size(); ++i) {\n        result << (i ? \",\" : \"\");\n        if (hostname_from_ip(splitted_ip[i].c_str(), &temp)) {\n            result << temp;\n        } else {\n            result << splitted_ip[i].c_str();\n            all_ok = false;\n        }\n    }\n    *hostname_result_list = result.str();\n    return all_ok;\n}\n\nbool list_hostname_from_ip_port(const char *ip_port_list, std::string *hostname_result_list)\n{\n    std::vector<std::string> splitted_ip_port;\n    dsn::utils::split_args(ip_port_list, splitted_ip_port, ',');\n\n    if (splitted_ip_port.empty()) {\n        dwarn(\"invalid ip_list(%s)\", ip_port_list);\n        *hostname_result_list = *ip_port_list;\n        return false;\n    }\n\n    std::string temp;\n    std::stringstream result;\n    bool all_ok = true;\n    for (int i = 0; i < splitted_ip_port.size(); ++i) {\n        result << (i ? \",\" : \"\");\n        if (hostname_from_ip_port(splitted_ip_port[i].c_str(), &temp)) {\n            result << temp;\n        } else {\n            result << splitted_ip_port[i].c_str();\n            all_ok = false;\n        }\n    }\n    *hostname_result_list = result.str();\n    return all_ok;\n}\n} // namespace utils\n} // namespace dsn\n"
  },
  {
    "path": "src/utils/zlock_provider.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#pragma once\n\n#include <dsn/utility/extensible_object.h>\n#include <dsn/utility/factory_store.h>\n#include <dsn/tool-api/zlocks.h>\n\nnamespace dsn {\n\nclass ilock\n{\npublic:\n    virtual ~ilock() {}\n    virtual void lock() = 0;\n    virtual bool try_lock() = 0;\n    virtual void unlock() = 0;\n};\n\nclass lock_provider : public ilock, public extensible_object<lock_provider, 4>\n{\npublic:\n    template <typename T>\n    static lock_provider *create(lock_provider *inner_provider)\n    {\n        return new T(inner_provider);\n    }\n    typedef lock_provider *(*factory)(lock_provider *);\n\n    template <typename T>\n    static void register_component(const char *name)\n    {\n        utils::factory_store<lock_provider>::register_factory(name, create<T>, PROVIDER_TYPE_MAIN);\n    }\n\npublic:\n    lock_provider(lock_provider *inner_provider) { _inner_provider = inner_provider; }\n    virtual ~lock_provider()\n    {\n        if (nullptr != _inner_provider)\n            delete _inner_provider;\n    }\n    lock_provider *get_inner_provider() const { return _inner_provider; }\n\nprivate:\n    lock_provider *_inner_provider;\n};\n\nclass lock_nr_provider : public ilock, public extensible_object<lock_nr_provider, 4>\n{\npublic:\n    template <typename T>\n    static lock_nr_provider *create(lock_nr_provider *inner_provider)\n    {\n        return new T(inner_provider);\n    }\n\n    typedef lock_nr_provider *(*factory)(lock_nr_provider *);\n\n    template <typename T>\n    static void register_component(const char *name)\n    {\n        utils::factory_store<lock_nr_provider>::register_factory(\n            name, create<T>, PROVIDER_TYPE_MAIN);\n    }\n\npublic:\n    lock_nr_provider(lock_nr_provider *inner_provider) { _inner_provider = inner_provider; }\n    virtual ~lock_nr_provider()\n    {\n        if (nullptr != _inner_provider)\n            delete _inner_provider;\n    }\n    lock_nr_provider *get_inner_provider() const { return _inner_provider; }\n\nprivate:\n    lock_nr_provider *_inner_provider;\n};\n\nclass rwlock_nr_provider : public extensible_object<rwlock_nr_provider, 4>\n{\npublic:\n    template <typename T>\n    static rwlock_nr_provider *create(rwlock_nr_provider *inner_provider)\n    {\n        return new T(inner_provider);\n    }\n\n    typedef rwlock_nr_provider *(*factory)(rwlock_nr_provider *);\n\n    template <typename T>\n    static void register_component(const char *name)\n    {\n        utils::factory_store<rwlock_nr_provider>::register_factory(\n            name, create<T>, PROVIDER_TYPE_MAIN);\n    }\n\npublic:\n    rwlock_nr_provider(rwlock_nr_provider *inner_provider) { _inner_provider = inner_provider; }\n    virtual ~rwlock_nr_provider()\n    {\n        if (nullptr != _inner_provider)\n            delete _inner_provider;\n    }\n\n    virtual void lock_read() = 0;\n    virtual void unlock_read() = 0;\n    virtual bool try_lock_read() = 0;\n\n    virtual void lock_write() = 0;\n    virtual void unlock_write() = 0;\n    virtual bool try_lock_write() = 0;\n\n    rwlock_nr_provider *get_inner_provider() const { return _inner_provider; }\n\nprivate:\n    rwlock_nr_provider *_inner_provider;\n};\n\nclass semaphore_provider : public extensible_object<semaphore_provider, 4>\n{\npublic:\n    template <typename T>\n    static semaphore_provider *create(int initCount, semaphore_provider *inner_provider)\n    {\n        return new T(initCount, inner_provider);\n    }\n\n    typedef semaphore_provider *(*factory)(int, semaphore_provider *);\n\n    template <typename T>\n    static void register_component(const char *name)\n    {\n        utils::factory_store<semaphore_provider>::register_factory(\n            name, create<T>, PROVIDER_TYPE_MAIN);\n    }\n\npublic:\n    semaphore_provider(int initial_count, semaphore_provider *inner_provider)\n    {\n        _inner_provider = inner_provider;\n    }\n    virtual ~semaphore_provider()\n    {\n        if (nullptr != _inner_provider)\n            delete _inner_provider;\n    }\n\npublic:\n    virtual void signal(int count) = 0;\n    virtual bool wait(int timeout_milliseconds = TIME_MS_MAX) = 0;\n\n    semaphore_provider *get_inner_provider() const { return _inner_provider; }\n\nprivate:\n    semaphore_provider *_inner_provider;\n};\n}\n"
  },
  {
    "path": "src/zookeeper/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.replication.zookeeper_provider)\n\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n# Search mode for source files under CURRENT project directory?\n# \"GLOB_RECURSE\" for recursive search\n# \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS zookeeper hashtable ssl crypto)\n\n# Extra files that will be installed\nset(MY_BINPLACES \"\")\n\ndsn_add_static_library()\n\nadd_definitions(-DHAVE_CYRUS_SASL_H)\n\nadd_subdirectory(test)\n"
  },
  {
    "path": "src/zookeeper/distributed_lock_service_zookeeper.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     distributed lock service implemented with zookeeper\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#include <dsn/tool-api/async_calls.h>\n#include <dsn/dist/replication/replication.codes.h>\n\n#include <zookeeper/zookeeper.h>\n#include <boost/lexical_cast.hpp>\n#include <functional>\n#include <algorithm>\n#include <utility>\n\n#include \"zookeeper_session.h\"\n#include \"distributed_lock_service_zookeeper.h\"\n#include \"lock_struct.h\"\n#include \"lock_types.h\"\n\n#include \"zookeeper_error.h\"\n\nnamespace dsn {\nnamespace dist {\n\nstd::string distributed_lock_service_zookeeper::LOCK_NODE_PREFIX = \"LOCKNODE\";\n\ndistributed_lock_service_zookeeper::distributed_lock_service_zookeeper() : ref_counter()\n{\n    _first_call = true;\n}\n\ndistributed_lock_service_zookeeper::~distributed_lock_service_zookeeper()\n{\n    if (_session) {\n        std::vector<lock_struct_ptr> handle_vec;\n        {\n            utils::auto_write_lock l(_service_lock);\n            for (auto &kv : _zookeeper_locks)\n                handle_vec.push_back(kv.second);\n            _zookeeper_locks.clear();\n        }\n        for (lock_struct_ptr &ptr : handle_vec)\n            _session->detach(ptr.get());\n        _session->detach(this);\n\n        _session = nullptr;\n    }\n}\n\nerror_code distributed_lock_service_zookeeper::finalize()\n{\n    release_ref();\n    return ERR_OK;\n}\n\nvoid distributed_lock_service_zookeeper::erase(const lock_key &key)\n{\n    utils::auto_write_lock l(_service_lock);\n    _zookeeper_locks.erase(key);\n}\n\nerror_code distributed_lock_service_zookeeper::initialize(const std::vector<std::string> &args)\n{\n    if (args.empty()) {\n        derror(\"need parameters: <lock_root>\");\n        return ERR_INVALID_PARAMETERS;\n    }\n    const char *lock_root = args[0].c_str();\n\n    _session =\n        zookeeper_session_mgr::instance().get_session(service_app::current_service_app_info());\n    _zoo_state = _session->attach(this,\n                                  std::bind(&distributed_lock_service_zookeeper::on_zoo_session_evt,\n                                            lock_srv_ptr(this),\n                                            std::placeholders::_1));\n    if (_zoo_state != ZOO_CONNECTED_STATE) {\n        _waiting_attach.wait_for(zookeeper_session_mgr::instance().timeout());\n        if (_zoo_state != ZOO_CONNECTED_STATE) {\n            dwarn(\n                \"attach to zookeeper session timeout, distributed lock service initialized failed\");\n            return ERR_TIMEOUT;\n        }\n    }\n\n    std::vector<std::string> slices;\n    utils::split_args(lock_root, slices, '/');\n    std::string current = \"\";\n    for (auto &str : slices) {\n        utils::notify_event e;\n        int zerr;\n        current = current + \"/\" + str;\n        zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n        op->_optype = zookeeper_session::ZOO_CREATE;\n        op->_input._path = current;\n        op->_callback_function = [&e, &zerr](zookeeper_session::zoo_opcontext *op) mutable {\n            zerr = op->_output.error;\n            e.notify();\n        };\n\n        _session->visit(op);\n        e.wait();\n        if (zerr != ZOK && zerr != ZNODEEXISTS) {\n            derror(\"create zk node failed, path = %s, err = %s\", current.c_str(), zerror(zerr));\n            return from_zerror(zerr);\n        }\n    }\n    _lock_root = current.empty() ? \"/\" : current;\n\n    ddebug(\"init distributed_lock_service_zookeeper succeed, lock_root = %s\", _lock_root.c_str());\n    // Notice: this reference is released in the finalize\n    add_ref();\n    return ERR_OK;\n}\n\nstd::pair<task_ptr, task_ptr>\ndistributed_lock_service_zookeeper::lock(const std::string &lock_id,\n                                         const std::string &myself_id,\n                                         task_code lock_cb_code,\n                                         const lock_callback &lock_cb,\n                                         task_code lease_expire_code,\n                                         const lock_callback &lease_expire_callback,\n                                         const lock_options &opt)\n{\n    lock_struct_ptr handle;\n    {\n        utils::auto_write_lock l(_service_lock);\n        auto id_pair = std::make_pair(lock_id, myself_id);\n        auto iter = _zookeeper_locks.find(id_pair);\n        if (iter == _zookeeper_locks.end()) {\n            if (!opt.create_if_not_exist) {\n                task_ptr tsk = tasking::enqueue(\n                    lock_cb_code, nullptr, std::bind(lock_cb, ERR_OBJECT_NOT_FOUND, \"\", -1));\n                return std::make_pair(tsk, nullptr);\n            } else {\n                handle = new lock_struct(lock_srv_ptr(this));\n                handle->initialize(lock_id, myself_id);\n                _zookeeper_locks[id_pair] = handle;\n            }\n        } else\n            handle = iter->second;\n    }\n\n    lock_future_ptr lock_tsk(new lock_future(lock_cb_code, lock_cb, 0));\n    lock_future_ptr expire_tsk(new lock_future(lease_expire_code, lease_expire_callback, 0));\n\n    tasking::enqueue(TASK_CODE_DLOCK,\n                     nullptr,\n                     std::bind(&lock_struct::try_lock, handle, lock_tsk, expire_tsk),\n                     handle->hash());\n    return std::make_pair(lock_tsk, expire_tsk);\n}\n\ntask_ptr distributed_lock_service_zookeeper::unlock(const std::string &lock_id,\n                                                    const std::string &myself_id,\n                                                    bool destroy,\n                                                    task_code cb_code,\n                                                    const err_callback &cb)\n{\n    lock_struct_ptr handle;\n    {\n        utils::auto_read_lock l(_service_lock);\n        auto iter = _zookeeper_locks.find(std::make_pair(lock_id, myself_id));\n        if (iter == _zookeeper_locks.end())\n            return tasking::enqueue(cb_code, nullptr, std::bind(cb, ERR_OBJECT_NOT_FOUND));\n        handle = iter->second;\n    }\n    error_code_future_ptr unlock_tsk(new error_code_future(cb_code, cb, 0));\n    tasking::enqueue(TASK_CODE_DLOCK,\n                     nullptr,\n                     std::bind(&lock_struct::unlock, handle, unlock_tsk),\n                     handle->hash());\n    return unlock_tsk;\n}\n\ntask_ptr distributed_lock_service_zookeeper::cancel_pending_lock(const std::string &lock_id,\n                                                                 const std::string &myself_id,\n                                                                 task_code cb_code,\n                                                                 const lock_callback &cb)\n{\n    lock_struct_ptr handle;\n    {\n        utils::auto_read_lock l(_service_lock);\n        auto iter = _zookeeper_locks.find(std::make_pair(lock_id, myself_id));\n        if (iter == _zookeeper_locks.end())\n            return tasking::enqueue(cb_code, nullptr, std::bind(cb, ERR_OBJECT_NOT_FOUND, \"\", -1));\n        handle = iter->second;\n    }\n    lock_future_ptr cancel_tsk(new lock_future(cb_code, cb, 0));\n    tasking::enqueue(TASK_CODE_DLOCK,\n                     nullptr,\n                     std::bind(&lock_struct::cancel_pending_lock, handle, cancel_tsk),\n                     handle->hash());\n    return cancel_tsk;\n}\n\ntask_ptr distributed_lock_service_zookeeper::query_lock(const std::string &lock_id,\n                                                        task_code cb_code,\n                                                        const lock_callback &cb)\n{\n    std::string owner = \"\";\n    uint64_t version = -1;\n    error_code ec = query_cache(lock_id, owner, version);\n    return tasking::enqueue(cb_code, nullptr, std::bind(cb, ec, owner, version));\n}\n\nerror_code distributed_lock_service_zookeeper::query_cache(const std::string &lock_id,\n                                                           /*out*/ std::string &owner,\n                                                           /*out*/ uint64_t &version)\n{\n    utils::auto_read_lock l(_service_lock);\n    auto iter = _lock_cache.find(lock_id);\n    if (_lock_cache.end() == iter)\n        return ERR_OBJECT_NOT_FOUND;\n    else {\n        owner = iter->second.first;\n        version = iter->second.second;\n        return ERR_OK;\n    }\n}\n\nvoid distributed_lock_service_zookeeper::refresh_lock_cache(const std::string &lock_id,\n                                                            const std::string &owner,\n                                                            uint64_t version)\n{\n    utils::auto_write_lock l(_service_lock);\n    _lock_cache[lock_id] = std::make_pair(owner, version);\n}\n\nvoid distributed_lock_service_zookeeper::dispatch_zookeeper_session_expire()\n{\n    utils::auto_read_lock l(_service_lock);\n    for (auto &kv : _zookeeper_locks)\n        tasking::enqueue(TASK_CODE_DLOCK,\n                         nullptr,\n                         std::bind(&lock_struct::lock_expired, kv.second),\n                         kv.second->hash());\n}\n\n/*static*/\n/* this function runs in zookeeper do-completion thread */\nvoid distributed_lock_service_zookeeper::on_zoo_session_evt(lock_srv_ptr _this, int zoo_state)\n{\n    // TODO: better policy of zookeeper session response\n    _this->_zoo_state = zoo_state;\n\n    if (_this->_first_call && ZOO_CONNECTED_STATE == zoo_state) {\n        _this->_first_call = false;\n        _this->_waiting_attach.notify();\n        return;\n    }\n\n    if (ZOO_EXPIRED_SESSION_STATE == zoo_state || ZOO_AUTH_FAILED_STATE == zoo_state) {\n        derror(\"get zoo state: %s, which means the session is expired\",\n               zookeeper_session::string_zoo_state(zoo_state));\n        _this->dispatch_zookeeper_session_expire();\n    } else {\n        dwarn(\"get zoo state: %s, ignore it\", zookeeper_session::string_zoo_state(zoo_state));\n    }\n}\n}\n}\n"
  },
  {
    "path": "src/zookeeper/distributed_lock_service_zookeeper.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     distributed lock service implemented with zookeeper\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#pragma once\n\n#include <dsn/dist/distributed_lock_service.h>\n#include <unordered_map>\n#include \"lock_types.h\"\n\nnamespace dsn {\nnamespace dist {\n\nclass zookeeper_session;\nclass distributed_lock_service_zookeeper : public distributed_lock_service, public ref_counter\n{\npublic:\n    explicit distributed_lock_service_zookeeper();\n    virtual ~distributed_lock_service_zookeeper() override;\n\n    // lock_root = argv[0]\n    virtual error_code initialize(const std::vector<std::string> &args) override;\n    virtual error_code finalize() override;\n\n    //\n    // distributed lock service implemented by zk.\n    // lock_cb is called when get the lock\n    //\n    // lease_expire_callback is called when the session-expire's zk-event is encountered\n    // use should exist the process when lease expires\n    //\n    virtual std::pair<task_ptr, task_ptr> lock(const std::string &lock_id,\n                                               const std::string &myself_id,\n                                               task_code lock_cb_code,\n                                               const lock_callback &lock_cb,\n                                               task_code lease_expire_code,\n                                               const lock_callback &lease_expire_callback,\n                                               const lock_options &opt) override;\n\n    virtual task_ptr cancel_pending_lock(const std::string &lock_id,\n                                         const std::string &myself_id,\n                                         task_code cb_code,\n                                         const lock_callback &cb) override;\n    virtual task_ptr unlock(const std::string &lock_id,\n                            const std::string &myself_id,\n                            bool destroy,\n                            task_code cb_code,\n                            const err_callback &cb) override;\n    virtual task_ptr\n    query_lock(const std::string &lock_id, task_code cb_code, const lock_callback &cb) override;\n    virtual error_code query_cache(const std::string &lock_id,\n                                   /*out*/ std::string &owner,\n                                   /*out*/ uint64_t &version);\n\n    void refresh_lock_cache(const std::string &lock_id, const std::string &owner, uint64_t version);\n\nprivate:\n    static std::string LOCK_NODE_PREFIX;\n\n    typedef std::pair<std::string, std::string> lock_key;\n    struct pair_hash\n    {\n        template <typename T, typename U>\n        std::size_t operator()(const std::pair<T, U> &key) const\n        {\n            return std::hash<T>()(key.first) + std::hash<U>()(key.second);\n        }\n    };\n\n    std::string _lock_root; // lock path: ${lock_root}/${lock_id}/${LOCK_NODE_PREFIX}${i}\n\n    typedef std::unordered_map<lock_key, lock_struct_ptr, pair_hash> lock_map;\n    typedef std::map<std::string, std::pair<std::string, uint64_t>> cache_map;\n\n    utils::rw_lock_nr _service_lock;\n    lock_map _zookeeper_locks;\n    cache_map _lock_cache;\n\n    zookeeper_session *_session;\n    int _zoo_state;\n    bool _first_call;\n    utils::notify_event _waiting_attach;\n\n    void erase(const lock_key &key);\n    void dispatch_zookeeper_session_expire();\n    zookeeper_session *session() { return _session; }\n\n    static void on_zoo_session_evt(lock_srv_ptr _this, int zoo_state);\n\n    friend class lock_struct;\n};\n}\n}\n"
  },
  {
    "path": "src/zookeeper/lock_struct.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     distributed lock service implemented with zookeeper, the implementation\n *     of lock_structure\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#include <functional>\n#include <string>\n#include <memory>\n#include <vector>\n\n#include <boost/lexical_cast.hpp>\n#include <boost/algorithm/string.hpp>\n\n#include <dsn/tool-api/thread_access_checker.h>\n#include <dsn/tool-api/async_calls.h>\n\n#include \"distributed_lock_service_zookeeper.h\"\n#include \"lock_struct.h\"\n#include \"lock_types.h\"\n#include \"zookeeper_session.h\"\n#include \"zookeeper_error.h\"\n\nnamespace dsn {\nnamespace dist {\n\nstatic const char *states[] = {\n    \"uninitialized\", \"pending\", \"locked\", \"expired\", \"cancelled\", \"unlocking\"};\n\nstatic inline const char *string_state(lock_state state)\n{\n    dassert(state < lock_state::state_count, \"state = %d\", (int)(state));\n    return states[state];\n}\n\nstatic bool is_zookeeper_timeout(int zookeeper_error)\n{\n    return zookeeper_error == ZCONNECTIONLOSS || zookeeper_error == ZOPERATIONTIMEOUT;\n}\n\n#define __check_code(code, allow_list, allow_list_size, code_str)                                  \\\n    do {                                                                                           \\\n        int i = 0;                                                                                 \\\n        for (; i != allow_list_size; ++i) {                                                        \\\n            if (code == allow_list[i])                                                             \\\n                break;                                                                             \\\n        }                                                                                          \\\n        dassert(i < allow_list_size, \"invalid code(%s)\", code_str);                                \\\n    } while (0)\n\n#define __execute(cb, _this) tasking::enqueue(TASK_CODE_DLOCK, nullptr, cb, _this->hash())\n\n#define __add_ref_and_delay_call(op, _this)                                                        \\\n    dwarn(\"operation %s on %s encounter error, retry later\",                                       \\\n          zookeeper_session::string_zoo_operation(op->_optype),                                    \\\n          op->_input._path.c_str());                                                               \\\n    zookeeper_session::add_ref(op);                                                                \\\n    tasking::enqueue(TASK_CODE_DLOCK,                                                              \\\n                     nullptr,                                                                      \\\n                     [_this, op]() { _this->_dist_lock_service->session()->visit(op); },           \\\n                     _this->hash(),                                                                \\\n                     std::chrono::seconds(1));\n\n#define IGNORE_CALLBACK true\n#define DONT_IGNORE_CALLBACK false\n#define REMOVE_FOR_UNLOCK true\n#define REMOVE_FOR_CANCEL false\n\nlock_struct::lock_struct(lock_srv_ptr srv) : ref_counter()\n{\n    _dist_lock_service = srv;\n    clear();\n    _state = lock_state::uninitialized;\n\n    _hash = 0;\n}\n\nvoid lock_struct::initialize(std::string lock_id, std::string myself_id)\n{\n    _lock_id = lock_id;\n    _myself._node_value = myself_id;\n\n    _hash = std::hash<std::string>()(lock_id) + std::hash<std::string>()(myself_id);\n}\n\nvoid lock_struct::clear()\n{\n    _lock_callback = nullptr;\n    _lease_expire_callback = nullptr;\n    _cancel_callback = nullptr;\n    _unlock_callback = nullptr;\n\n    _lock_id = _lock_dir = \"\";\n    _myself._node_value = _owner._node_value = \"\";\n    _myself._node_seq_name = _owner._node_seq_name = \"\";\n    _myself._sequence_id = _owner._sequence_id = -1;\n}\n\nvoid lock_struct::remove_lock()\n{\n    _checker.only_one_thread_access();\n\n    if (_dist_lock_service != nullptr) {\n        _dist_lock_service->erase(std::make_pair(_lock_id, _myself._node_value));\n        _dist_lock_service->session()->detach(this);\n        _dist_lock_service = nullptr;\n    }\n}\n\nvoid lock_struct::on_operation_timeout()\n{\n    ddebug(\"zookeeper operation times out, removing the current watching\");\n    _state = lock_state::uninitialized;\n    _dist_lock_service->session()->detach(this);\n    _lock_callback->enqueue_with(ERR_TIMEOUT, _owner._node_value, _owner._sequence_id);\n}\n\nvoid lock_struct::on_expire()\n{\n    if (_state == lock_state::expired)\n        return;\n    _state = lock_state::expired;\n    remove_lock();\n    _lease_expire_callback->enqueue_with(ERR_EXPIRED, _owner._node_value, _owner._sequence_id);\n    clear();\n}\n\nint64_t lock_struct::parse_seq_path(const std::string &path)\n{\n    int64_t ans = 0;\n    int64_t power = 1;\n    int i = ((int)path.size()) - 1;\n    for (; i >= 0 && isdigit(path[i]); --i) {\n        ans = ans + (path[i] - '0') * power;\n        power *= 10;\n    }\n    const std::string &match = distributed_lock_service_zookeeper::LOCK_NODE_PREFIX;\n    int j = ((int)match.size()) - 1;\n    for (; i >= 0 && j >= 0 && path[i] == match[j]; --i, --j)\n        ;\n    if (power == 1 || j >= 0) {\n        dwarn(\"invalid path: %s\", path.c_str());\n        return -1;\n    }\n    return ans;\n}\n\n/*static*/\nvoid lock_struct::my_lock_removed(lock_struct_ptr _this, int zoo_event)\n{\n    static const lock_state allow_state[] = {\n        lock_state::locked, lock_state::unlocking, lock_state::expired};\n    _this->_checker.only_one_thread_access();\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::unlocking || _this->_state == lock_state::expired) {\n        return;\n    } else {\n        _this->on_expire();\n    }\n}\n/*static*/\nvoid lock_struct::owner_change(lock_struct_ptr _this, int zoo_event)\n{\n    static const lock_state allow_state[] = {\n        lock_state::uninitialized, lock_state::pending, lock_state::cancelled, lock_state::expired};\n    _this->_checker.only_one_thread_access();\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::uninitialized) {\n        dwarn(\"this is mainly due to a timeout happens before, just ignore the event %s\",\n              zookeeper_session::string_zoo_event(zoo_event));\n        return;\n    }\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        return;\n    }\n    if (ZOO_DELETED_EVENT == zoo_event) {\n        _this->_owner._sequence_id = -1;\n        _this->_owner._node_seq_name.clear();\n        _this->_owner._node_value.clear();\n        _this->get_lockdir_nodes();\n    } else if (ZOO_NOTWATCHING_EVENT == zoo_event)\n        _this->get_lock_owner(false);\n    else\n        dassert(false, \"unexpected event\");\n}\n/*static*/\nvoid lock_struct::after_remove_duplicated_locknode(lock_struct_ptr _this,\n                                                   int ec,\n                                                   std::shared_ptr<std::string> path)\n{\n    static const int allow_ec[] = {\n        ZOK,\n        ZNONODE,      // ok\n        ZINVALIDSTATE // operation timeout\n    };\n    static const int allow_state[] = {\n        lock_state::pending, lock_state::cancelled, lock_state::expired, lock_state::locked};\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 3, zerror(ec));\n    __check_code(_this->_state, allow_state, 4, string_state(_this->_state));\n\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        return;\n    }\n\n    if (_this->_state == lock_state::locked) {\n        // why the state may be locked:\n        // 1. send request to remove duplicated node\n        // 2. the node_delete_watcher is notified\n        // 3. check the children and found myself get the lock\n        // 4. requst in 1 resonsed\n        ddebug(\"the state is locked mainly because owner changed watcher is triggered first\");\n    }\n\n    if (ZOK == ec || ZNONODE == ec) {\n        ddebug(\"lock(%s) remove duplicated node(%s), rely on delete watcher to be actived\",\n               _this->_lock_id.c_str(),\n               path->c_str());\n    } else {\n        derror(\"lock struct(%s), myself(%s) got session expire\",\n               _this->_lock_dir.c_str(),\n               _this->_myself._node_seq_name.c_str());\n        _this->on_expire();\n    }\n}\n\nvoid lock_struct::remove_duplicated_locknode(std::string &&znode_path)\n{\n    lock_struct_ptr _this = this;\n    dwarn(\n        \"duplicated value(%s) ephe/seq node(%s and %s) create on zookeeper, remove the smaller one\",\n        _myself._node_value.c_str(),\n        _owner._node_seq_name.c_str(),\n        _myself._node_seq_name.c_str());\n\n    auto delete_callback_wrapper = [_this](zookeeper_session::zoo_opcontext *op) {\n        if (is_zookeeper_timeout(op->_output.error)) {\n            __add_ref_and_delay_call(op, _this);\n        } else {\n            __execute(std::bind(&lock_struct::after_remove_duplicated_locknode,\n                                _this,\n                                op->_output.error,\n                                std::shared_ptr<std::string>(\n                                    new std::string(std::move(op->_input._path)))),\n                      _this);\n        }\n    };\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    op->_optype = zookeeper_session::ZOO_DELETE;\n    op->_callback_function = delete_callback_wrapper;\n    op->_input._path = std::move(znode_path);\n    _dist_lock_service->session()->visit(op);\n}\n/*static*/\nvoid lock_struct::after_get_lock_owner(lock_struct_ptr _this,\n                                       int ec,\n                                       std::shared_ptr<std::string> value)\n{\n    static const int allow_ec[] = {\n        ZOK,          // OK\n        ZNONODE,      // owner session removed\n        ZINVALIDSTATE // operation timeout\n    };\n    static const int allow_state[] = {\n        lock_state::pending, lock_state::cancelled, lock_state::expired};\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 3, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        return;\n    }\n    if (ZNONODE == ec) {\n        // lock owner removed\n        ddebug(\"the lock(%s) old owner(%s:%s) has removed, myself(%s:%s) try to get the lock for \"\n               \"another turn\",\n               _this->_lock_id.c_str(),\n               _this->_owner._node_seq_name.c_str(),\n               _this->_owner._node_value.c_str(),\n               _this->_myself._node_seq_name.c_str(),\n               _this->_myself._node_value.c_str());\n        _this->_owner._sequence_id = -1;\n        _this->_owner._node_seq_name.clear();\n        _this->get_lockdir_nodes();\n        return;\n    }\n    if (ZOK == ec) {\n        _this->_owner._node_value = std::move(*value);\n        if (_this->_myself._node_value == _this->_owner._node_value)\n            _this->remove_duplicated_locknode(_this->_lock_dir + \"/\" +\n                                              _this->_owner._node_seq_name);\n        else {\n            _this->_dist_lock_service->refresh_lock_cache(\n                _this->_lock_id, _this->_owner._node_value, _this->_owner._sequence_id);\n            ddebug(\"wait the lock(%s) owner(%s:%s) to remove, myself(%s:%s)\",\n                   _this->_lock_id.c_str(),\n                   _this->_owner._node_seq_name.c_str(),\n                   _this->_owner._node_value.c_str(),\n                   _this->_myself._node_seq_name.c_str(),\n                   _this->_myself._node_value.c_str());\n        }\n    } else {\n        derror(\"lock_dir(%s), myself(%s), sessin expired\",\n               _this->_lock_dir.c_str(),\n               _this->_myself._node_seq_name.c_str());\n        _this->on_expire();\n    }\n}\n/*static*/\nvoid lock_struct::after_self_check(lock_struct_ptr _this,\n                                   int ec,\n                                   std::shared_ptr<std::string> value)\n{\n    static const int allow_ec[] = {\n        ZOK,          // OK\n        ZNONODE,      // removed by unlock, or session expired\n        ZINVALIDSTATE // session expired\n    };\n    static const lock_state allow_state[] = {\n        lock_state::locked, lock_state::unlocking, lock_state::expired};\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 3, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::unlocking || _this->_state == lock_state::expired) {\n        ddebug(\"skip lock(%s) owner self check, do nothing, myself(%s:%s)\",\n               _this->_lock_id.c_str(),\n               _this->_myself._node_seq_name.c_str(),\n               _this->_myself._node_value.c_str());\n        return;\n    }\n    if (ZNONODE == ec || ZINVALIDSTATE == ec) {\n        ddebug(\"lock(%s) session expired, error reason(%s), myself(%s:%s)\",\n               _this->_lock_id.c_str(),\n               zerror(ec),\n               _this->_myself._node_seq_name.c_str(),\n               _this->_myself._node_value.c_str());\n        _this->on_expire();\n        return;\n    }\n    dassert(*value == _this->_myself._node_value,\n            \"lock(%s) get wrong value, local myself(%s), from zookeeper(%s)\",\n            _this->_lock_id.c_str(),\n            _this->_myself._node_value.c_str(),\n            value->c_str());\n}\n\nvoid lock_struct::get_lock_owner(bool watch_myself)\n{\n    lock_struct_ptr _this = this;\n    auto watcher_callback_wrapper = [_this, watch_myself](int event) {\n        ddebug(\"get watcher callback, event type(%s)\", zookeeper_session::string_zoo_event(event));\n        if (watch_myself)\n            __execute(std::bind(&lock_struct::my_lock_removed, _this, event), _this);\n        else\n            __execute(std::bind(&lock_struct::owner_change, _this, event), _this);\n    };\n\n    auto after_get_owner_wrapper = [_this, watch_myself](zookeeper_session::zoo_opcontext *op) {\n        zookeeper_session::zoo_output &output = op->_output;\n        std::function<void(int, std::shared_ptr<std::string>)> cb;\n        if (!watch_myself)\n            cb = std::bind(&lock_struct::after_get_lock_owner,\n                           _this,\n                           std::placeholders::_1,\n                           std::placeholders::_2);\n        else\n            cb = std::bind(&lock_struct::after_self_check,\n                           _this,\n                           std::placeholders::_1,\n                           std::placeholders::_2);\n\n        if (is_zookeeper_timeout(output.error)) {\n            _this->_dist_lock_service->session()->detach(\n                _this.get()); // before retry, first we need to remove the watcher\n            __add_ref_and_delay_call(op, _this);\n        } else if (output.error != ZOK)\n            __execute(std::bind(cb, output.error, nullptr), _this);\n        else {\n            std::shared_ptr<std::string> buf(\n                new std::string(output.get_op.value, output.get_op.value_length));\n            __execute(std::bind(cb, ZOK, buf), _this);\n        }\n    };\n\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    op->_optype = zookeeper_session::ZOO_GET;\n    op->_callback_function = after_get_owner_wrapper;\n    op->_input._path = _lock_dir + \"/\" + _owner._node_seq_name;\n\n    op->_input._is_set_watch = 1;\n    op->_input._owner = this;\n    op->_input._watcher_callback = watcher_callback_wrapper;\n\n    _dist_lock_service->session()->visit(op);\n}\n/*static*/\nvoid lock_struct::after_get_lockdir_nodes(lock_struct_ptr _this,\n                                          int ec,\n                                          std::shared_ptr<std::vector<std::string>> children)\n{\n    static const int allow_ec[] = {\n        ZOK,          // succeed\n        ZINVALIDSTATE // session expired\n    };\n    static const int allow_state[] = {\n        lock_state::pending, lock_state::cancelled, lock_state::expired};\n\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 2, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        return;\n    }\n    if (ZINVALIDSTATE == ec) {\n        derror(\"get lockdir(%s) children got session expired\", _this->_lock_dir.c_str());\n        _this->on_expire();\n        return;\n    }\n\n    int64_t min_seq = -1, min_pos = -1, my_pos = -1;\n    int64_t myself_seq = _this->_myself._sequence_id;\n\n    for (int i = 0; i != (int)children->size(); ++i) {\n        std::string &child = (*children)[i];\n        int64_t seq = parse_seq_path(child);\n        if (seq == -1) {\n            dwarn(\"an invalid node(%s) in lockdir(%s), ignore\",\n                  child.c_str(),\n                  _this->_lock_dir.c_str());\n            continue;\n        }\n        if (min_pos == -1 || min_seq > seq)\n            min_seq = seq, min_pos = i;\n        if (myself_seq == seq)\n            my_pos = i;\n    }\n\n    ddebug(\"min sequece number(%lld) in lockdir(%s)\", min_seq, _this->_lock_dir.c_str());\n    if (my_pos == -1) {\n        // znode removed on zookeeper, may timeout or removed by other procedure\n        dwarn(\"sequence and ephemeral node(%s/%s) removed when get_children, this is abnormal, \"\n              \"try to reaquire the lock\",\n              _this->_lock_dir.c_str(),\n              _this->_myself._node_seq_name.c_str());\n        _this->_myself._node_seq_name.clear();\n        _this->_myself._sequence_id = -1;\n        _this->create_locknode();\n        return;\n    } else {\n        _this->_owner._sequence_id = min_seq;\n        _this->_owner._node_seq_name = std::move((*children)[min_pos]);\n        bool watch_myself = false;\n        if (min_seq == myself_seq) {\n            // i am the smallest one, so i get the lock :-)\n            dassert(min_pos == my_pos,\n                    \"same sequence node number on zookeeper, dir(%s), number(%d)\",\n                    _this->_lock_dir.c_str(),\n                    myself_seq);\n            _this->_state = lock_state::locked;\n            _this->_owner._node_value = _this->_myself._node_value;\n            _this->_dist_lock_service->refresh_lock_cache(\n                _this->_lock_id, _this->_owner._node_value, _this->_owner._sequence_id);\n\n            watch_myself = true;\n            ddebug(\"got the lock(%s), myself(%s:%s)\",\n                   _this->_lock_id.c_str(),\n                   _this->_myself._node_seq_name.c_str(),\n                   _this->_myself._node_value.c_str());\n            _this->_lock_callback->enqueue_with(\n                ERR_OK, _this->_myself._node_value, _this->_myself._sequence_id);\n        }\n        _this->get_lock_owner(watch_myself);\n    }\n}\n\nvoid lock_struct::get_lockdir_nodes()\n{\n    lock_struct_ptr _this = this;\n    auto result_wrapper = [_this](zookeeper_session::zoo_opcontext *op) {\n        if (is_zookeeper_timeout(op->_output.error)) {\n            __add_ref_and_delay_call(op, _this);\n        } else if (op->_output.error != ZOK) {\n            __execute(\n                std::bind(&lock_struct::after_get_lockdir_nodes, _this, op->_output.error, nullptr),\n                _this);\n        } else {\n            const String_vector *vec = op->_output.getchildren_op.strings;\n            std::shared_ptr<std::vector<std::string>> children(\n                new std::vector<std::string>(vec->count));\n            for (int i = 0; i != vec->count; ++i)\n                (*children)[i].assign(vec->data[i]);\n            __execute(\n                std::bind(\n                    &lock_struct::after_get_lockdir_nodes, _this, op->_output.error, children),\n                _this);\n        }\n    };\n\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    op->_optype = zookeeper_session::ZOO_GETCHILDREN;\n    op->_callback_function = result_wrapper;\n    op->_input._path = _lock_dir;\n    op->_input._is_set_watch = 0;\n    _dist_lock_service->session()->visit(op);\n}\n/*static*/\nvoid lock_struct::after_create_locknode(lock_struct_ptr _this,\n                                        int ec,\n                                        std::shared_ptr<std::string> path)\n{\n    // as we create an ephe|seq node, so ZNODEEXISTS is not allowed\n    static const int allow_ec[] = {\n        ZOK,          // succeed\n        ZINVALIDSTATE // operation timeout\n    };\n    static const int allow_state[] = {\n        lock_state::pending, lock_state::cancelled, lock_state::expired};\n\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 2, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    dinfo(\"after create seq and ephe node, error(%s), path(%s)\", zerror(ec), path->c_str());\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        ddebug(\"current state(%s), ignore event create lockdir(%s)\", _this->_lock_dir.c_str());\n        if (ZOK == ec && _this->_state == lock_state::cancelled) {\n            _this->remove_my_locknode(std::move(*path), IGNORE_CALLBACK, REMOVE_FOR_CANCEL);\n        }\n        return;\n    }\n    if (ZINVALIDSTATE == ec) {\n        derror(\"create seq/ephe node (%s) in dir(%s) got session expired\",\n               distributed_lock_service_zookeeper::LOCK_NODE_PREFIX.c_str(),\n               _this->_lock_dir.c_str());\n        _this->on_expire();\n        return;\n    }\n\n    char splitter[] = {'/', 0};\n    _this->_myself._node_seq_name = utils::get_last_component(*path, splitter);\n    _this->_myself._sequence_id = parse_seq_path(_this->_myself._node_seq_name);\n    dassert(_this->_myself._sequence_id != -1, \"invalid seq path created\");\n    ddebug(\"create seq/ephe node in dir(%s) ok, my_sequence_id(%d)\",\n           _this->_lock_dir.c_str(),\n           _this->_myself._sequence_id);\n    _this->get_lockdir_nodes();\n}\n\nvoid lock_struct::create_locknode()\n{\n    // create an ZOO_EPHEMERAL|ZOO_SEQUENCE node\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    op->_optype = zookeeper_session::ZOO_OPERATION::ZOO_CREATE;\n\n    lock_struct_ptr _this = this;\n    auto result_wrapper = [_this](zookeeper_session::zoo_opcontext *op) {\n        if (is_zookeeper_timeout(op->_output.error)) {\n            __add_ref_and_delay_call(op, _this);\n        } else if (op->_output.error != ZOK) {\n            __execute(\n                std::bind(&lock_struct::after_create_locknode, _this, op->_output.error, nullptr),\n                _this);\n        } else {\n            std::shared_ptr<std::string> path(new std::string(op->_output.create_op._created_path));\n            __execute(std::bind(&lock_struct::after_create_locknode, _this, ZOK, path), _this);\n        }\n    };\n\n    zookeeper_session::zoo_input &input = op->_input;\n    input._path = _lock_dir + \"/\" + distributed_lock_service_zookeeper::LOCK_NODE_PREFIX;\n    input._value.assign(_myself._node_value.c_str(), 0, _myself._node_value.length());\n    input._flags = ZOO_EPHEMERAL | ZOO_SEQUENCE;\n    op->_callback_function = result_wrapper;\n    _dist_lock_service->session()->visit(op);\n}\n/*static*/\nvoid lock_struct::after_create_lockdir(lock_struct_ptr _this, int ec)\n{\n    _this->_checker.only_one_thread_access();\n    static const int allow_ec[] = {\n        ZOK,\n        ZNODEEXISTS,  // succeed state\n        ZINVALIDSTATE // session expire\n    };\n    static const lock_state allow_state[] = {\n        lock_state::pending, lock_state::cancelled, lock_state::expired};\n    __check_code(ec, allow_ec, 3, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    if (_this->_state == lock_state::cancelled || _this->_state == lock_state::expired) {\n        ddebug(\"current state(%s), ignore event create lockdir(%s)\",\n               string_state(_this->_state),\n               _this->_lock_dir.c_str());\n        return;\n    }\n    if (ZINVALIDSTATE == ec) {\n        derror(\"create lock dir failed got session expire, _path(%s)\", _this->_lock_dir.c_str());\n        _this->_lock_dir.clear();\n        _this->on_expire();\n        return;\n    }\n    _this->create_locknode();\n}\n/*static*/\nvoid lock_struct::try_lock(lock_struct_ptr _this,\n                           lock_future_ptr lock_callback,\n                           lock_future_ptr expire_callback)\n{\n    _this->_checker.only_one_thread_access();\n\n    if (_this->_state != lock_state::uninitialized) {\n        lock_callback->enqueue_with(ERR_RECURSIVE_LOCK, \"\", -1);\n        return;\n    }\n\n    _this->_lock_callback = lock_callback;\n    _this->_lease_expire_callback = expire_callback;\n    _this->_state = lock_state::pending;\n\n    if (_this->_lock_dir.empty()) {\n        _this->_lock_dir = _this->_dist_lock_service->_lock_root + \"/\" + _this->_lock_id;\n        auto result_wrapper = [_this](zookeeper_session::zoo_opcontext *op) {\n            if (is_zookeeper_timeout(op->_output.error)) {\n                __add_ref_and_delay_call(op, _this);\n            } else {\n                __execute(std::bind(&lock_struct::after_create_lockdir, _this, op->_output.error),\n                          _this);\n            }\n        };\n        zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n        op->_optype = zookeeper_session::ZOO_CREATE;\n        op->_input._path = _this->_lock_dir;\n        op->_callback_function = result_wrapper;\n        _this->_dist_lock_service->session()->visit(op);\n    } else if (_this->_myself._sequence_id == -1)\n        _this->create_locknode();\n    else if (_this->_owner._sequence_id == -1)\n        _this->get_lockdir_nodes();\n    else\n        _this->get_lock_owner(false);\n}\n\nvoid lock_struct::after_remove_my_locknode(lock_struct_ptr _this, int ec, bool remove_for_unlock)\n{\n    static const int allow_ec[] = {\n        ZOK,\n        ZNONODE,      // ok\n        ZINVALIDSTATE // operation timeout\n    };\n    static const int allow_state[] = {\n        lock_state::cancelled, lock_state::unlocking, lock_state::expired};\n    _this->_checker.only_one_thread_access();\n    __check_code(ec, allow_ec, 3, zerror(ec));\n    __check_code(_this->_state, allow_state, 3, string_state(_this->_state));\n\n    error_code dsn_ec;\n    if (lock_state::expired == _this->_state) {\n        ddebug(\"during unlock/cancel lock(%s), encountered expire, owner(%s:%s), myself(%s:%s)\",\n               _this->_lock_id.c_str(),\n               _this->_owner._node_seq_name.c_str(),\n               _this->_owner._node_value.c_str(),\n               _this->_myself._node_seq_name.c_str(),\n               _this->_myself._node_value.c_str());\n        dsn_ec = ERR_INVALID_STATE;\n    } else {\n        if (ZINVALIDSTATE == ec) {\n            _this->on_expire(); // when expire, only expire_callback is called, the unlock/cancel's\n                                // callback is ignored\n            return;\n        } else\n            dsn_ec = ERR_OK;\n    }\n\n    if (dsn_ec == ERR_OK)\n        _this->remove_lock();\n\n    if (REMOVE_FOR_UNLOCK == remove_for_unlock)\n        _this->_unlock_callback->enqueue_with(dsn_ec);\n    else {\n        _this->_cancel_callback->enqueue_with(\n            dsn_ec, _this->_owner._node_value, _this->_owner._sequence_id);\n    }\n\n    if (dsn_ec == ERR_OK) {\n        _this->clear();\n    }\n}\n\nvoid lock_struct::remove_my_locknode(std::string &&znode_path,\n                                     bool ignore_callback,\n                                     bool remove_for_unlock)\n{\n    lock_struct_ptr _this = this;\n    auto result_wrapper = [_this, ignore_callback, remove_for_unlock](\n        zookeeper_session::zoo_opcontext *op) {\n        ddebug(\"delete node %s, result(%s)\", op->_input._path.c_str(), zerror(op->_output.error));\n        if (is_zookeeper_timeout(op->_output.error)) {\n            __add_ref_and_delay_call(op, _this);\n            return;\n        }\n\n        if (IGNORE_CALLBACK != ignore_callback) {\n            __execute(std::bind(&lock_struct::after_remove_my_locknode,\n                                _this,\n                                op->_output.error,\n                                remove_for_unlock),\n                      _this);\n        }\n    };\n    zookeeper_session::zoo_opcontext *op = zookeeper_session::create_context();\n    op->_optype = zookeeper_session::ZOO_DELETE;\n    op->_input._path = std::move(znode_path);\n    op->_callback_function = result_wrapper;\n    _dist_lock_service->session()->visit(op);\n}\n\n/*static*/\nvoid lock_struct::cancel_pending_lock(lock_struct_ptr _this, lock_future_ptr cancel_callback)\n{\n    _this->_checker.only_one_thread_access();\n    if (_this->_state != lock_state::uninitialized && _this->_state != lock_state::pending &&\n        _this->_state != lock_state::cancelled) {\n        cancel_callback->enqueue_with(ERR_INVALID_PARAMETERS, \"\", _this->_owner._sequence_id);\n        return;\n    }\n\n    _this->_state = lock_state::cancelled;\n    _this->_cancel_callback = cancel_callback;\n    if (!_this->_myself._node_seq_name.empty())\n        _this->remove_my_locknode(_this->_lock_dir + \"/\" + _this->_myself._node_seq_name,\n                                  DONT_IGNORE_CALLBACK,\n                                  REMOVE_FOR_CANCEL);\n    else {\n        _this->remove_lock();\n        cancel_callback->enqueue_with(\n            ERR_OK, _this->_owner._node_value, _this->_owner._sequence_id);\n        _this->clear();\n    }\n}\n\n/*static*/\nvoid lock_struct::unlock(lock_struct_ptr _this, error_code_future_ptr unlock_callback)\n{\n    _this->_checker.only_one_thread_access();\n    if (_this->_state != lock_state::locked && _this->_state != lock_state::unlocking) {\n        ddebug(\"lock(%s) myself(%s) seqid(%lld) state(%s), just return\",\n               _this->_lock_id.c_str(),\n               _this->_myself._node_value.c_str(),\n               _this->_owner._sequence_id,\n               string_state(_this->_state));\n        unlock_callback->enqueue_with(ERR_INVALID_PARAMETERS);\n        return;\n    }\n\n    _this->_state = lock_state::unlocking;\n    _this->_unlock_callback = unlock_callback;\n    _this->remove_my_locknode(_this->_lock_dir + \"/\" + _this->_myself._node_seq_name,\n                              DONT_IGNORE_CALLBACK,\n                              REMOVE_FOR_UNLOCK);\n}\n\n/*static*/\nvoid lock_struct::lock_expired(lock_struct_ptr _this)\n{\n    _this->_checker.only_one_thread_access();\n    _this->on_expire();\n}\n}\n}\n"
  },
  {
    "path": "src/zookeeper/lock_struct.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     distributed lock service implemented with zookeeper, the definition of each lock structure\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#pragma once\n\n#include <dsn/tool-api/thread_access_checker.h>\n#include <dsn/dist/distributed_lock_service.h>\n#include <string>\n\n#include \"lock_types.h\"\n\nnamespace dsn {\nnamespace dist {\n\nenum lock_state\n{\n    uninitialized,\n    pending,\n    locked,\n    expired,\n    cancelled,\n    unlocking,\n    state_count\n};\n\nstruct zoolock_pair\n{\n    std::string _node_value;\n    std::string _node_seq_name;\n    int64_t _sequence_id;\n};\n\nclass lock_struct : public ref_counter\n{\npublic:\n    lock_struct(lock_srv_ptr srv);\n    void initialize(std::string lock_id, std::string myself_id);\n    const int hash() const { return _hash; }\n\n    static void\n    try_lock(lock_struct_ptr _this, lock_future_ptr lock_callback, lock_future_ptr expire_callback);\n    static void cancel_pending_lock(lock_struct_ptr _this, lock_future_ptr cancel_callback);\n    static void unlock(lock_struct_ptr _this, error_code_future_ptr unlock_callback);\n\n    static void lock_expired(lock_struct_ptr _this);\n\nprivate:\n    void create_locknode();\n    void get_lockdir_nodes();\n    void get_lock_owner(bool watch_myself);\n    void remove_duplicated_locknode(std::string &&znode_path);\n\n    void remove_my_locknode(std::string &&znode_path, bool ignore_callback, bool remove_for_unlock);\n\n    void clear();\n    void remove_lock();\n    void on_operation_timeout();\n    void on_expire();\n\n    static int64_t parse_seq_path(const std::string &path);\n    static void after_create_lockdir(lock_struct_ptr _this, int ec);\n    static void after_get_lockdir_nodes(lock_struct_ptr _this,\n                                        int ec,\n                                        std::shared_ptr<std::vector<std::string>> children);\n    static void\n    after_create_locknode(lock_struct_ptr _this, int ec, std::shared_ptr<std::string> path);\n    static void\n    after_get_lock_owner(lock_struct_ptr _this, int ec, std::shared_ptr<std::string> value);\n    static void after_self_check(lock_struct_ptr _this, int ec, std::shared_ptr<std::string> value);\n    static void after_remove_duplicated_locknode(lock_struct_ptr _this,\n                                                 int ec,\n                                                 std::shared_ptr<std::string> value);\n    static void after_remove_my_locknode(lock_struct_ptr _this, int ec, bool need_to_notify);\n\n    /*lock owner watch callback*/\n    static void owner_change(lock_struct_ptr _this, int zoo_event);\n    static void my_lock_removed(lock_struct_ptr _this, int zoo_event);\n\nprivate:\n    lock_future_ptr _lock_callback;\n    lock_future_ptr _lease_expire_callback;\n    lock_future_ptr _cancel_callback;\n    error_code_future_ptr _unlock_callback;\n\n    std::string _lock_id;\n    std::string _lock_dir; // ${lock_root}/${lock_id}\n    zoolock_pair _myself, _owner;\n    lock_state _state;\n    int _hash;\n\n    lock_srv_ptr _dist_lock_service;\n\n    thread_access_checker _checker;\n};\n}\n}\n"
  },
  {
    "path": "src/zookeeper/lock_types.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     distributed lock service implemented with zookeeper, some types definition\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#pragma once\n\n#include <dsn/utility/autoref_ptr.h>\n#include <dsn/tool-api/auto_codes.h>\n#include <dsn/dist/distributed_lock_service.h>\n\nnamespace dsn {\nnamespace dist {\n\nDEFINE_THREAD_POOL_CODE(THREAD_POOL_DLOCK)\nDEFINE_TASK_CODE(TASK_CODE_DLOCK, TASK_PRIORITY_HIGH, THREAD_POOL_DLOCK)\n\nclass distributed_lock_service_zookeeper;\nclass lock_struct;\ntypedef ref_ptr<distributed_lock_service_zookeeper> lock_srv_ptr;\ntypedef ref_ptr<lock_struct> lock_struct_ptr;\n}\n}\n"
  },
  {
    "path": "src/zookeeper/test/CMakeLists.txt",
    "content": "set(MY_PROJ_NAME dsn.zookeeper.tests)\n\n# Source files under CURRENT project directory will be automatically included.\n# You can manually set MY_PROJ_SRC to include source files under other directories.\nset(MY_PROJ_SRC \"\")\n\n#  Search mode for source files under CURRENT project directory?\n#  \"GLOB_RECURSE\" for recursive search\n#  \"GLOB\" for non-recursive search\nset(MY_SRC_SEARCH_MODE \"GLOB\")\n\nset(MY_PROJ_LIBS \n    dsn.replication.zookeeper_provider\n    dsn_runtime\n    zookeeper\n    hashtable\n    gtest\n    ssl\n    crypto\n    )\n\nset(MY_BOOST_LIBS Boost::system Boost::filesystem Boost::regex)\n\n# Extra files that will be installed\nset(MY_BINPLACES\n    \"${CMAKE_CURRENT_SOURCE_DIR}/run.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/clear.sh\"\n    \"${CMAKE_CURRENT_SOURCE_DIR}/config-test.ini\"\n    )\n\ndsn_add_test()\n"
  },
  {
    "path": "src/zookeeper/test/clear.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nrm -rf core* log.* *.log data\n"
  },
  {
    "path": "src/zookeeper/test/config-test.ini",
    "content": "; The MIT License (MIT)\n;\n; Copyright (c) 2015 Microsoft Corporation\n;\n; -=- Robust Distributed System Nucleus (rDSN) -=-\n;\n; Permission is hereby granted, free of charge, to any person obtaining a copy\n; of this software and associated documentation files (the \"Software\"), to deal\n; in the Software without restriction, including without limitation the rights\n; to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n; copies of the Software, and to permit persons to whom the Software is\n; furnished to do so, subject to the following conditions:\n;\n; The above copyright notice and this permission notice shall be included in\n; all copies or substantial portions of the Software.\n;\n; THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n; AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n; LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n; THE SOFTWARE.\n\n[apps..default]\nrun = true\ncount = 1\n;network.client.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n;network.client.RPC_CHANNEL_UDP = dsn::tools::sim_network_provider, 65536\n;network.server.0.RPC_CHANNEL_TCP = dsn::tools::sim_network_provider, 65536\n\n[apps.client]\ntype = test\narguments = localhost 20101\nrun = true\nports =\ncount = 1\ndelay_seconds = 1\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK, THREAD_POOL_REPLICATION, THREAD_POOL_REPLICATION_LONG,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.server]\ntype = test\narguments =\nports = 20101\nrun = false\ncount = 0\npools = THREAD_POOL_DEFAULT, THREAD_POOL_REPLICATION,THREAD_POOL_SLOG,THREAD_POOL_PLOG\n\n[apps.add_server]\ntype = adder\narguments =\nports = 20201\nrun = true\ncount = 3\npools = THREAD_POOL_DEFAULT, THREAD_POOL_DLOCK\n\n[core]\n;tool = simulator\ntool = nativerun\n\n;toollets = tracer, profiler\n;fault_injector\npause_on_start = false\n\nlogging_start_level = LOG_LEVEL_INFORMATION\nlogging_factory_name = dsn::tools::simple_logger\n\n\n[tools.simple_logger]\nfast_flush = true\nshort_header = false\nstderr_start_level = LOG_LEVEL_WARNING\n\n[tools.simulator]\nrandom_seed = 0\n\n[tools.screen_logger]\nshort_header = false\n\n[network]\n; how many network threads for network library (used by asio)\nio_service_worker_count = 2\n\n[task..default]\nis_trace = true\nis_profile = true\nallow_inline = false\nrpc_call_channel = RPC_CHANNEL_TCP\nrpc_message_header_format = dsn\nrpc_timeout_milliseconds = 5000\n\n[task.LPC_AIO_IMMEDIATE_CALLBACK]\nis_trace = false\nis_profile = false\nallow_inline = false\n\n[task.LPC_RPC_TIMEOUT]\nis_trace = false\nis_profile = false\n\n; specification for each thread pool\n[threadpool..default]\nworker_count = 2\n\n[threadpool.THREAD_POOL_DEFAULT]\npartitioned = false\nworker_priority = THREAD_xPRIORITY_NORMAL\n\n[threadpool.THREAD_POOL_TEST_SERVER]\npartitioned = false\n\n[threadpool.THREAD_POOL_BLOCK_SERVICE]\nworker_count = 8\n\n[threadpool.THREAD_POOL_DLOCK]\npartitioned = true\n\n[zookeeper]\nhosts_list = localhost:12181\ntimeout_ms = 30000\nlogfile = zoolog.log\n"
  },
  {
    "path": "src/zookeeper/test/distributed_lock_zookeeper.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <vector>\n#include <string>\n#include <functional>\n#include <thread>\n\n#include <boost/lexical_cast.hpp>\n#include <gtest/gtest.h>\n\n#include <dsn/service_api_cpp.h>\n\n#include \"zookeeper/distributed_lock_service_zookeeper.h\"\n#include \"zookeeper/lock_struct.h\"\n\nusing namespace dsn;\nusing namespace dsn::dist;\n\nDEFINE_TASK_CODE(DLOCK_CALLBACK, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT)\n\nbool ss_start = false;\nbool ss_finish = false;\n\nstd::vector<int64_t> q;\nint pos = 0;\nint64_t result = 0;\n\nclass simple_adder_server : public dsn::service_app\n{\npublic:\n    simple_adder_server(const service_app_info *info) : ::dsn::service_app(info) {}\n\n    error_code start(const std::vector<std::string> &args)\n    {\n        ddebug(\"name: %s, argc=%u\", info().full_name.c_str(), args.size());\n        for (const std::string &s : args)\n            ddebug(\"argv: %s\", s.c_str());\n        while (!ss_start)\n            std::this_thread::sleep_for(std::chrono::seconds(1));\n\n        _dlock_service = new distributed_lock_service_zookeeper();\n        auto err = _dlock_service->initialize({\"/dsn/tests/simple_adder_server\"});\n        dassert(err == ERR_OK, \"err = %s\", err.to_string());\n\n        distributed_lock_service::lock_options opt = {true, true};\n        while (!ss_finish) {\n            std::pair<task_ptr, task_ptr> task_pair = _dlock_service->lock(\n                \"test_lock\",\n                info().full_name,\n                DLOCK_CALLBACK,\n                [this](error_code ec, const std::string &name, int version) {\n                    EXPECT_TRUE(ERR_OK == ec);\n                    EXPECT_TRUE(name == this->info().full_name);\n                    ddebug(\"lock: error_code: %s, name: %s, lock version: %d\",\n                           ec.to_string(),\n                           name.c_str(),\n                           version);\n                },\n                DLOCK_CALLBACK,\n                [](error_code, const std::string &, int) { dassert(false, \"session expired\"); },\n                opt);\n            task_pair.first->wait();\n            for (int i = 0; i < 1000; ++i) {\n                if (pos >= q.size()) {\n                    ss_finish = true;\n                    break;\n                }\n                result += q[pos++];\n            }\n            task_ptr unlock_task = _dlock_service->unlock(\n                \"test_lock\", info().full_name, true, DLOCK_CALLBACK, [](error_code ec) {\n                    EXPECT_TRUE(ERR_OK == ec);\n                    ddebug(\"unlock, error code: %s\", ec.to_string());\n                });\n            unlock_task->wait();\n            task_pair.second->cancel(false);\n        }\n\n        return ERR_OK;\n    }\n\n    error_code stop(bool cleanup) override { return ERR_OK; }\n\nprivate:\n    ref_ptr<distributed_lock_service_zookeeper> _dlock_service;\n};\n\nTEST(distributed_lock_service_zookeeper, simple_lock_unlock)\n{\n    pos = 0;\n    result = 0;\n    ss_start = false;\n    ss_finish = false;\n    q.clear();\n\n    srand(time(0));\n    q.reserve(100000);\n    for (int i = 0; i != 100000; ++i) {\n        int64_t rand1 = rand() % 10000;\n        int64_t rand2 = rand() % 10000;\n        q.push_back(rand1 * rand2);\n    }\n\n    int64_t expect_reuslt = 0;\n    for (int64_t i : q)\n        expect_reuslt += i;\n\n    ss_start = true;\n    while (!ss_finish)\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n\n    ddebug(\"actual result: %lld, expect_result:%lld\", result, expect_reuslt);\n    EXPECT_TRUE(result == expect_reuslt);\n}\n\nTEST(distributed_lock_service_zookeeper, abnormal_api_call)\n{\n    ref_ptr<distributed_lock_service_zookeeper> dlock_svc(new distributed_lock_service_zookeeper());\n    ASSERT_EQ(ERR_OK, dlock_svc->initialize({\"/dsn/tests/simple_adder_server\"}));\n\n    std::string lock_id = \"test_lock2\";\n    std::string my_id = \"test_myid\";\n    std::string my_id2 = \"test_myid2\";\n\n    distributed_lock_service::lock_options opt = {false, true};\n    std::pair<task_ptr, task_ptr> cb_pair = dlock_svc->lock(\n        lock_id,\n        my_id,\n        DLOCK_CALLBACK,\n        [](error_code ec, const std::string &, int) { ASSERT_TRUE(ERR_OBJECT_NOT_FOUND == ec); },\n        DLOCK_CALLBACK,\n        nullptr,\n        opt);\n    ASSERT_TRUE(cb_pair.first != nullptr && cb_pair.second == nullptr);\n    cb_pair.first->wait();\n\n    opt.create_if_not_exist = true;\n    cb_pair =\n        dlock_svc->lock(lock_id,\n                        my_id,\n                        DLOCK_CALLBACK,\n                        [](error_code ec, const std::string &, int) { ASSERT_TRUE(ec == ERR_OK); },\n                        DLOCK_CALLBACK,\n                        nullptr,\n                        opt);\n    ASSERT_TRUE(cb_pair.first != nullptr && cb_pair.second != nullptr);\n    cb_pair.first->wait();\n\n    // recursive lock\n    std::pair<task_ptr, task_ptr> cb_pair2 = dlock_svc->lock(\n        lock_id,\n        my_id,\n        DLOCK_CALLBACK,\n        [](error_code ec, const std::string &, int) { ASSERT_TRUE(ec == ERR_RECURSIVE_LOCK); },\n        DLOCK_CALLBACK,\n        nullptr,\n        opt);\n    ASSERT_TRUE(cb_pair2.first != nullptr && cb_pair2.second != nullptr);\n    cb_pair2.first->wait();\n    cb_pair2.second->cancel(false);\n\n    // try to cancel an locked lock\n    task_ptr tsk = dlock_svc->cancel_pending_lock(\n        lock_id, my_id, DLOCK_CALLBACK, [](error_code ec, const std::string &, int) {\n            ASSERT_TRUE(ec == ERR_INVALID_PARAMETERS);\n        });\n    tsk->wait();\n\n    // try to cancel an non-exist lock\n    tsk = dlock_svc->cancel_pending_lock(\n        lock_id, \"non-exist-myself\", DLOCK_CALLBACK, [](error_code ec, const std::string &, int) {\n            ASSERT_TRUE(ec == ERR_OBJECT_NOT_FOUND);\n        });\n    tsk->wait();\n\n    tsk = dlock_svc->query_lock(\n        lock_id, DLOCK_CALLBACK, [my_id](error_code ec, const std::string &name, int) {\n            ASSERT_TRUE(ec == ERR_OK);\n            ASSERT_TRUE(name == my_id);\n        });\n    tsk->wait();\n\n    cb_pair2 = dlock_svc->lock(lock_id,\n                               my_id2,\n                               DLOCK_CALLBACK,\n                               [my_id2](error_code ec, const std::string &name, int) {\n                                   ASSERT_TRUE(ec == ERR_OK);\n                                   ASSERT_TRUE(name == my_id2);\n                               },\n                               DLOCK_CALLBACK,\n                               nullptr,\n                               opt);\n\n    bool result = cb_pair2.first->wait(2000);\n    ASSERT_FALSE(result);\n\n    tsk = dlock_svc->unlock(\n        lock_id, my_id, true, DLOCK_CALLBACK, [](error_code ec) { ASSERT_TRUE(ec == ERR_OK); });\n\n    tsk->wait();\n\n    // the pending lock[lock_id, my_id2] will get the lock after\n    // [lock_id, my_id](the next step) unlocked\n    cb_pair2.first->wait();\n    tsk = dlock_svc->unlock(\n        lock_id, my_id2, true, DLOCK_CALLBACK, [](error_code ec) { ASSERT_TRUE(ec == ERR_OK); });\n\n    tsk->wait();\n}\n\nvoid lock_test_init() { dsn::service_app::register_factory<simple_adder_server>(\"adder\"); }\n"
  },
  {
    "path": "src/zookeeper/test/main.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n#include <iostream>\n\n#ifndef _WIN32\n#include <sys/types.h>\n#include <signal.h>\n#endif\n\n#include <gtest/gtest.h>\n#include <dsn/service_api_cpp.h>\n\nint g_test_count = 0;\nint g_test_ret = 0;\n\nextern void lock_test_init();\n\nclass test_client : public ::dsn::service_app\n{\npublic:\n    test_client(const dsn::service_app_info *info) : ::dsn::service_app(info) {}\n\n    ::dsn::error_code start(const std::vector<std::string> &args)\n    {\n        int argc = args.size();\n        char *argv[20];\n        for (int i = 0; i < argc; ++i) {\n            argv[i] = (char *)(args[i].c_str());\n        }\n        testing::InitGoogleTest(&argc, argv);\n        g_test_ret = RUN_ALL_TESTS();\n        g_test_count = 1;\n        return ::dsn::ERR_OK;\n    }\n\n    ::dsn::error_code stop(bool cleanup = false) { return ::dsn::ERR_OK; }\n};\n\nGTEST_API_ int main(int argc, char **argv)\n{\n    testing::InitGoogleTest(&argc, argv);\n\n    // register all possible services\n    dsn::service_app::register_factory<test_client>(\"test\");\n    lock_test_init();\n\n    // specify what services and tools will run in config file, then run\n    if (argc < 2)\n        dsn_run_config(\"config-test.ini\", false);\n    else\n        dsn_run_config(argv[1], false);\n\n    while (g_test_count == 0) {\n        std::this_thread::sleep_for(std::chrono::seconds(1));\n    }\n\n#ifndef ENABLE_GCOV\n    dsn_exit(g_test_ret);\n#endif\n    return g_test_ret;\n}\n"
  },
  {
    "path": "src/zookeeper/test/run.sh",
    "content": "#!/bin/bash\n# The MIT License (MIT)\n#\n# Copyright (c) 2015 Microsoft Corporation\n#\n# -=- Robust Distributed System Nucleus (rDSN) -=-\n#\n# Permission is hereby granted, free of charge, to any person obtaining a copy\n# of this software and associated documentation files (the \"Software\"), to deal\n# in the Software without restriction, including without limitation the rights\n# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n# copies of the Software, and to permit persons to whom the Software is\n# furnished to do so, subject to the following conditions:\n#\n# The above copyright notice and this permission notice shall be included in\n# all copies or substantial portions of the Software.\n#\n# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n# THE SOFTWARE.\n\n\nif [ -z \"${REPORT_DIR}\" ]; then\n    REPORT_DIR=\".\"\nfi\n\n./clear.sh\n\noutput_xml=\"${REPORT_DIR}/dsn.zookeeper.tests.xml\"\nGTEST_OUTPUT=\"xml:${output_xml}\" ./dsn.zookeeper.tests config-test.ini\n\nif [ $? -ne 0 ]; then\n    echo \"run dsn.zookeeper.tests failed\"\n    echo \"---- ls ----\"\n    ls -l\n    if find . -name log.1.txt; then\n        echo \"---- tail -n 100 log.1.txt ----\"\n        tail -n 100 `find . -name log.1.txt`\n    fi\n    if [ -f core ]; then\n        echo \"---- gdb ./dsn.zookeeper.tests core ----\"\n        gdb ./dsn.zookeeper.tests core -ex \"thread apply all bt\" -ex \"set pagination 0\" -batch\n    fi\n    exit 1\nfi\n"
  },
  {
    "path": "src/zookeeper/zookeeper_error.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     transform zookeeper error code to dsn error code, implementation\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n\n#include <zookeeper/zookeeper.h>\n#include <dsn/utility/error_code.h>\n#include <dsn/tool-api/auto_codes.h>\n\n#include \"zookeeper_error.h\"\nnamespace dsn {\nnamespace dist {\n\nerror_code from_zerror(int zerr)\n{\n    if (ZOK == zerr)\n        return ERR_OK;\n    if (ZBADARGUMENTS == zerr || ZNOTEMPTY == zerr)\n        return ERR_INVALID_PARAMETERS;\n    if (ZCONNECTIONLOSS == zerr || ZOPERATIONTIMEOUT == zerr || ZINVALIDSTATE == zerr)\n        return ERR_TIMEOUT;\n    if (ZNONODE == zerr)\n        return ERR_OBJECT_NOT_FOUND;\n    if (ZNODEEXISTS == zerr)\n        return ERR_NODE_ALREADY_EXIST;\n    if (ZRUNTIMEINCONSISTENCY == zerr)\n        return ERR_INCONSISTENT_STATE;\n    return ERR_ZOOKEEPER_OPERATION;\n}\n}\n}\n"
  },
  {
    "path": "src/zookeeper/zookeeper_error.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     transform zookeeper error code to dsn error code, header\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#pragma once\n\n#include <dsn/tool-api/auto_codes.h>\n\nnamespace dsn {\nnamespace dist {\n\nerror_code from_zerror(int zerr);\n}\n}\n"
  },
  {
    "path": "src/zookeeper/zookeeper_session.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a C++ wrapper of zookeeper c async api, implementation\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n\n#include <zookeeper/zookeeper.h>\n#include <sasl/sasl.h>\n\n#include \"zookeeper_session.h\"\n#include \"zookeeper_session_mgr.h\"\n\n#include <dsn/utility/flags.h>\n\nnamespace dsn {\nnamespace security {\nDSN_DECLARE_bool(enable_zookeeper_kerberos);\nDSN_DEFINE_string(\"security\",\n                  zookeeper_kerberos_service_name,\n                  \"zookeeper\",\n                  \"zookeeper kerberos service name\");\n} // namespace security\n} // namespace dsn\n\nnamespace dsn {\nnamespace dist {\n\nzookeeper_session::zoo_atomic_packet::zoo_atomic_packet(unsigned int size)\n{\n    _capacity = size;\n    _count = 0;\n\n    _ops = (zoo_op_t *)malloc(sizeof(zoo_op_t) * size);\n    _results = (zoo_op_result_t *)malloc(sizeof(zoo_op_result_t) * size);\n\n    _paths.resize(size);\n    _datas.resize(size);\n}\n\nzookeeper_session::zoo_atomic_packet::~zoo_atomic_packet()\n{\n    for (int i = 0; i < _count; ++i) {\n        if (_ops[i].type == ZOO_CREATE_OP)\n            free(_ops[i].create_op.buf);\n        else if (_ops[i].type == ZOO_SETDATA_OP)\n            free(_ops[i].set_op.stat);\n    }\n    free(_ops);\n    free(_results);\n}\n\nchar *zookeeper_session::zoo_atomic_packet::alloc_buffer(int buffer_length)\n{\n    return (char *)malloc(buffer_length);\n}\n\n/*static*/\nconst char *zookeeper_session::string_zoo_operation(ZOO_OPERATION op)\n{\n    switch (op) {\n    case ZOO_CREATE:\n        return \"zoo_create\";\n    case ZOO_DELETE:\n        return \"zoo_delete\";\n    case ZOO_EXISTS:\n        return \"zoo_exists\";\n    case ZOO_GET:\n        return \"zoo_get\";\n    case ZOO_GETCHILDREN:\n        return \"zoo_getchildren\";\n    case ZOO_SET:\n        return \"zoo_set\";\n    case ZOO_ASYNC:\n        return \"zoo_async\";\n    case ZOO_TRANSACTION:\n        return \"zoo_transaction\";\n    default:\n        return \"invalid\";\n    }\n}\n\n/*static*/\nconst char *zookeeper_session::string_zoo_event(int zoo_event)\n{\n    if (ZOO_SESSION_EVENT == zoo_event)\n        return \"session event\";\n    if (ZOO_CREATED_EVENT == zoo_event)\n        return \"created event\";\n    if (ZOO_DELETED_EVENT == zoo_event)\n        return \"deleted event\";\n    if (ZOO_CHANGED_EVENT == zoo_event)\n        return \"changed event\";\n    if (ZOO_CHILD_EVENT == zoo_event)\n        return \"child event\";\n    if (ZOO_NOTWATCHING_EVENT == zoo_event)\n        return \"notwatching event\";\n    return \"invalid event\";\n}\n\n/*static*/\nconst char *zookeeper_session::string_zoo_state(int zoo_state)\n{\n    if (ZOO_CONNECTED_STATE == zoo_state)\n        return \"connected_state\";\n    if (ZOO_EXPIRED_SESSION_STATE == zoo_state)\n        return \"expired_session_state\";\n    if (ZOO_AUTH_FAILED_STATE == zoo_state)\n        return \"auth_failed_state\";\n    if (ZOO_CONNECTING_STATE == zoo_state)\n        return \"connecting_state\";\n    if (ZOO_ASSOCIATING_STATE == zoo_state)\n        return \"associating_state\";\n    if (ZOO_CONNECTED_STATE == zoo_state)\n        return \"connected_state\";\n    return \"invalid_state\";\n}\n\nzookeeper_session::~zookeeper_session() {}\n\nzookeeper_session::zookeeper_session(const service_app_info &node) : _handle(nullptr)\n{\n    _srv_node = node;\n}\n\nint zookeeper_session::attach(void *callback_owner, const state_callback &cb)\n{\n    utils::auto_write_lock l(_watcher_lock);\n    if (nullptr == _handle) {\n        if (dsn::security::FLAGS_enable_zookeeper_kerberos) {\n            zoo_sasl_params_t sasl_params = {0};\n            sasl_params.service = dsn::security::FLAGS_zookeeper_kerberos_service_name;\n            sasl_params.mechlist = \"GSSAPI\";\n            _handle = zookeeper_init_sasl(zookeeper_session_mgr::instance().zoo_hosts(),\n                                          global_watcher,\n                                          zookeeper_session_mgr::instance().timeout(),\n                                          nullptr,\n                                          this,\n                                          0,\n                                          NULL,\n                                          &sasl_params);\n        } else {\n            _handle = zookeeper_init(zookeeper_session_mgr::instance().zoo_hosts(),\n                                     global_watcher,\n                                     zookeeper_session_mgr::instance().timeout(),\n                                     nullptr,\n                                     this,\n                                     0);\n        }\n        dassert(_handle != nullptr, \"zookeeper session init failed\");\n    }\n\n    _watchers.push_back(watcher_object());\n    _watchers.back().watcher_path = \"\";\n    _watchers.back().callback_owner = callback_owner;\n    _watchers.back().watcher_callback = cb;\n\n    return zoo_state(_handle);\n}\n\nvoid zookeeper_session::detach(void *callback_owner)\n{\n    utils::auto_write_lock l(_watcher_lock);\n    _watchers.remove_if([callback_owner](const watcher_object &obj) {\n        return obj.callback_owner == callback_owner;\n    });\n}\n\nvoid zookeeper_session::dispatch_event(int type, int zstate, const char *path)\n{\n    {\n        utils::auto_read_lock l(_watcher_lock);\n        int ret_code = type;\n        if (ZOO_SESSION_EVENT == ret_code)\n            ret_code = zstate;\n\n        std::for_each(\n            _watchers.begin(), _watchers.end(), [path, ret_code](const watcher_object &obj) {\n                if (obj.watcher_path == path)\n                    obj.watcher_callback(ret_code);\n            });\n    }\n    {\n        if (ZOO_SESSION_EVENT != type) {\n            utils::auto_write_lock l(_watcher_lock);\n            _watchers.remove_if(\n                [path](const watcher_object &obj) { return obj.watcher_path == path; });\n        }\n    }\n}\n\nvoid zookeeper_session::visit(zoo_opcontext *ctx)\n{\n    ctx->_priv_session_ref = this;\n\n    if (zoo_state(_handle) != ZOO_CONNECTED_STATE) {\n        ctx->_output.error = ZINVALIDSTATE;\n        ctx->_callback_function(ctx);\n        release_ref(ctx);\n        return;\n    }\n\n    auto add_watch_object = [this, ctx]() {\n        utils::auto_write_lock l(_watcher_lock);\n        _watchers.push_back(watcher_object());\n        _watchers.back().watcher_path = ctx->_input._path;\n        _watchers.back().callback_owner = ctx->_input._owner;\n        _watchers.back().watcher_callback = std::move(ctx->_input._watcher_callback);\n    };\n\n    // TODO: the read ops from zookeeper might get the staled data, need to fix\n    int ec = ZOK;\n    zoo_input &input = ctx->_input;\n    const char *path = input._path.c_str();\n    switch (ctx->_optype) {\n    case ZOO_CREATE:\n        ec = zoo_acreate(_handle,\n                         path,\n                         input._value.data(),\n                         input._value.length(),\n                         &ZOO_OPEN_ACL_UNSAFE,\n                         ctx->_input._flags,\n                         global_string_completion,\n                         (const void *)ctx);\n        break;\n    case ZOO_DELETE:\n        ec = zoo_adelete(_handle, path, -1, global_void_completion, (const void *)ctx);\n        break;\n    case ZOO_EXISTS:\n        if (1 == input._is_set_watch)\n            add_watch_object();\n        ec = zoo_aexists(\n            _handle, path, input._is_set_watch, global_state_completion, (const void *)ctx);\n        break;\n    case ZOO_GET:\n        if (1 == input._is_set_watch)\n            add_watch_object();\n        ec =\n            zoo_aget(_handle, path, input._is_set_watch, global_data_completion, (const void *)ctx);\n        break;\n    case ZOO_SET:\n        ec = zoo_aset(_handle,\n                      path,\n                      input._value.data(),\n                      input._value.length(),\n                      -1,\n                      global_state_completion,\n                      (const void *)ctx);\n        break;\n    case ZOO_GETCHILDREN:\n        if (1 == input._is_set_watch)\n            add_watch_object();\n        ec = zoo_aget_children(\n            _handle, path, input._is_set_watch, global_strings_completion, (const void *)ctx);\n        break;\n    case ZOO_TRANSACTION:\n        ec = zoo_amulti(_handle,\n                        input._pkt->_count,\n                        input._pkt->_ops,\n                        input._pkt->_results,\n                        global_void_completion,\n                        (const void *)ctx);\n        break;\n    default:\n        break;\n    }\n\n    if (ZOK != ec) {\n        ctx->_output.error = ec;\n        ctx->_callback_function(ctx);\n        release_ref(ctx);\n    }\n}\n\nvoid zookeeper_session::init_non_dsn_thread()\n{\n    static __thread int dsn_context_init = 0;\n    if (dsn_context_init == 0) {\n        dsn_mimic_app(_srv_node.role_name.c_str(), _srv_node.index);\n        dsn_context_init = 1;\n    }\n}\n\n/*\n * the following static functions are in zookeeper threads,\n */\n/* static */\nvoid zookeeper_session::global_watcher(\n    zhandle_t *handle, int type, int state, const char *path, void *ctx)\n{\n    zookeeper_session *zoo_session = (zookeeper_session *)ctx;\n    zoo_session->init_non_dsn_thread();\n    ddebug(\"global watcher, type(%s), state(%s)\", string_zoo_event(type), string_zoo_state(state));\n    if (type != ZOO_SESSION_EVENT && path != nullptr)\n        ddebug(\"watcher path: %s\", path);\n\n    dassert(zoo_session->_handle == handle, \"\");\n    zoo_session->dispatch_event(type, state, type == ZOO_SESSION_EVENT ? \"\" : path);\n}\n\n#define COMPLETION_INIT(rc, data)                                                                  \\\n    zoo_opcontext *op_ctx = (zoo_opcontext *)data;                                                 \\\n    op_ctx->_priv_session_ref->init_non_dsn_thread();                                              \\\n    zoo_output &output = op_ctx->_output;                                                          \\\n    output.error = rc\n/* static */\nvoid zookeeper_session::global_string_completion(int rc, const char *name, const void *data)\n{\n    COMPLETION_INIT(rc, data);\n    dinfo(\"rc(%s), input path(%s)\", zerror(rc), op_ctx->_input._path.c_str());\n    if (ZOK == rc && name != nullptr)\n        dinfo(\"created path:%s\", name);\n    output.create_op._created_path = name;\n    op_ctx->_callback_function(op_ctx);\n    release_ref(op_ctx);\n}\n/* static */\nvoid zookeeper_session::global_data_completion(\n    int rc, const char *value, int value_length, const Stat *, const void *data)\n{\n    COMPLETION_INIT(rc, data);\n    dinfo(\"rc(%s), input path(%s)\", zerror(rc), op_ctx->_input._path.c_str());\n    output.get_op.value_length = value_length;\n    output.get_op.value = value;\n    op_ctx->_callback_function(op_ctx);\n    release_ref(op_ctx);\n}\n/* static */\nvoid zookeeper_session::global_state_completion(int rc, const Stat *stat, const void *data)\n{\n    COMPLETION_INIT(rc, data);\n    dinfo(\"rc(%s), input path(%s)\", zerror(rc), op_ctx->_input._path.c_str());\n    if (op_ctx->_optype == ZOO_EXISTS) {\n        output.exists_op._node_stat = stat;\n        op_ctx->_callback_function(op_ctx);\n    } else {\n        output.set_op._node_stat = stat;\n        op_ctx->_callback_function(op_ctx);\n    }\n    release_ref(op_ctx);\n}\n/* static */\nvoid zookeeper_session::global_strings_completion(int rc,\n                                                  const String_vector *strings,\n                                                  const void *data)\n{\n    COMPLETION_INIT(rc, data);\n    dinfo(\"rc(%s), input path(%s)\", zerror(rc), op_ctx->_input._path.c_str());\n    if (rc == ZOK && strings != nullptr)\n        dinfo(\"child count: %d\", strings->count);\n    output.getchildren_op.strings = strings;\n    op_ctx->_callback_function(op_ctx);\n    release_ref(op_ctx);\n}\n/* static */\nvoid zookeeper_session::global_void_completion(int rc, const void *data)\n{\n    COMPLETION_INIT(rc, data);\n    if (op_ctx->_optype == ZOO_DELETE)\n        dinfo(\"rc(%s), input path( %s )\", zerror(rc), op_ctx->_input._path.c_str());\n    else\n        dinfo(\"rc(%s)\", zerror(rc));\n    op_ctx->_callback_function(op_ctx);\n    release_ref(op_ctx);\n}\n}\n}\n"
  },
  {
    "path": "src/zookeeper/zookeeper_session.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a C++ wrapper of zookeeper c async api, header\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n\n#include <dsn/utility/utils.h>\n#include <dsn/utility/singleton.h>\n#include <dsn/utility/synchronize.h>\n\n#include <thread>\n#include <zookeeper/zookeeper.h>\n#include \"zookeeper_session_mgr.h\"\n\nnamespace dsn {\nnamespace dist {\n\nclass zookeeper_session\n{\npublic:\n    enum ZOO_OPERATION\n    {\n        ZOO_CREATE,\n        ZOO_DELETE,\n        ZOO_EXISTS,\n        ZOO_GET,\n        ZOO_GETCHILDREN,\n        ZOO_SET,\n        ZOO_ASYNC,\n        ZOO_TRANSACTION,\n        ZOO_OPCOUNT,\n        ZOO_OPINVALID\n    };\n\n    struct zoo_atomic_packet\n    {\n    public:\n        unsigned int _capacity;\n        unsigned int _count;\n        zoo_op_t *_ops;\n        zoo_op_result_t *_results;\n\n        std::vector<std::string> _paths;\n        std::vector<blob> _datas;\n\n    public:\n        zoo_atomic_packet(unsigned int size);\n        ~zoo_atomic_packet();\n\n        static char *alloc_buffer(int buffer_length);\n    };\n\n    struct zoo_input\n    {\n        std::string _path;\n\n        /* for create and set */\n        blob _value;\n        /* for create */\n        int _flags;\n        /* for get/exists/get_children */\n        int _is_set_watch;\n\n        /* for watcher callback */\n        void *_owner;\n        std::function<void(int)> _watcher_callback;\n\n        /* for multi-op transaction */\n        std::shared_ptr<zoo_atomic_packet> _pkt;\n    };\n\n    struct zoo_output\n    {\n        int error;\n        union\n        {\n            struct\n            {\n                const char *_created_path;\n            } create_op;\n            struct\n            {\n                const struct Stat *_node_stat;\n            } exists_op;\n            struct\n            {\n                const struct Stat *_node_stat;\n            } set_op;\n            struct\n            {\n                const String_vector *strings;\n            } getchildren_op;\n            struct\n            {\n                const char *value;\n                int value_length;\n            } get_op;\n        };\n    };\n\n    struct zoo_opcontext : public dsn::ref_counter\n    {\n        ZOO_OPERATION _optype;\n        zoo_input _input;\n        zoo_output _output;\n        std::function<void(zoo_opcontext *)> _callback_function;\n\n        // this are for implement usage, user shouldn't modify this directly\n        zookeeper_session *_priv_session_ref;\n        int32_t _ref_count;\n    };\n\n    static zoo_opcontext *create_context()\n    {\n        zoo_opcontext *result = new zoo_opcontext();\n        result->_input._flags = 0;\n        result->_input._is_set_watch = false;\n        result->_input._owner = nullptr;\n        result->_input._watcher_callback = nullptr;\n\n        memset(&(result->_output), 0, sizeof(zoo_output));\n\n        result->_optype = ZOO_OPINVALID;\n        result->_callback_function = nullptr;\n        result->_priv_session_ref = nullptr;\n\n        result->add_ref();\n        return result;\n    }\n\n    static void add_ref(zoo_opcontext *op) { op->add_ref(); }\n    static void release_ref(zoo_opcontext *op) { op->release_ref(); }\n    static const char *string_zoo_operation(ZOO_OPERATION op);\n    static const char *string_zoo_event(int zoo_event);\n    static const char *string_zoo_state(int zoo_state);\n\npublic:\n    typedef std::function<void(int)> state_callback;\n    zookeeper_session(const service_app_info &info);\n    ~zookeeper_session();\n    int attach(void *callback_owner, const state_callback &cb);\n    void detach(void *callback_owner);\n\n    int session_state() const { return zoo_state(_handle); }\n    void visit(zoo_opcontext *op_context);\n    void init_non_dsn_thread();\n\nprivate:\n    utils::rw_lock_nr _watcher_lock;\n    struct watcher_object\n    {\n        std::string watcher_path;\n        void *callback_owner;\n        state_callback watcher_callback;\n    };\n    std::list<watcher_object> _watchers;\n    service_app_info _srv_node;\n    zhandle_t *_handle;\n\n    void dispatch_event(int type, int zstate, const char *path);\n    static void global_watcher(zhandle_t *handle, int type, int state, const char *path, void *ctx);\n    static void global_string_completion(int rc, const char *name, const void *data);\n    static void global_data_completion(\n        int rc, const char *value, int value_length, const struct Stat *stat, const void *data);\n    static void global_state_completion(int rc, const struct Stat *stat, const void *data);\n    static void\n    global_strings_completion(int rc, const struct String_vector *strings, const void *data);\n    static void global_void_completion(int rc, const void *data);\n};\n}\n}\n"
  },
  {
    "path": "src/zookeeper/zookeeper_session_mgr.cpp",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a singleton to manager all zookeeper sessions, so that each zookeeper session\n *     can be shared by all threads in one service-node. The implementation file.\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n#include \"zookeeper_session_mgr.h\"\n#include \"zookeeper_session.h\"\n\n#include <stdio.h>\n#include <zookeeper/zookeeper.h>\n#include <stdexcept>\n\nnamespace dsn {\nnamespace dist {\n\nzookeeper_session_mgr::zookeeper_session_mgr()\n{\n    _zoo_hosts = dsn_config_get_value_string(\"zookeeper\", \"hosts_list\", \"\", \"zookeeper_hosts\");\n    _timeout_ms = dsn_config_get_value_uint64(\n        \"zookeeper\", \"timeout_ms\", 30000, \"zookeeper_timeout_milliseconds\");\n    _zoo_logfile = dsn_config_get_value_string(\"zookeeper\", \"logfile\", \"\", \"zookeeper logfile\");\n\n    FILE *fp = fopen(_zoo_logfile.c_str(), \"a\");\n    if (fp != nullptr)\n        zoo_set_log_stream(fp);\n}\n\nzookeeper_session *zookeeper_session_mgr::get_session(const service_app_info &info)\n{\n    auto &store = utils::singleton_store<int, zookeeper_session *>::instance();\n    zookeeper_session *ans = nullptr;\n    utils::auto_lock<utils::ex_lock_nr> l(_store_lock);\n    if (!store.get(info.entity_id, ans)) {\n        ans = new zookeeper_session(info);\n        store.put(info.entity_id, ans);\n    }\n    return ans;\n}\n}\n}\n"
  },
  {
    "path": "src/zookeeper/zookeeper_session_mgr.h",
    "content": "/*\n * The MIT License (MIT)\n *\n * Copyright (c) 2015 Microsoft Corporation\n *\n * -=- Robust Distributed System Nucleus (rDSN) -=-\n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to deal\n * in the Software without restriction, including without limitation the rights\n * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n * copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n * THE SOFTWARE.\n */\n\n/*\n * Description:\n *     a singleton to manager all zookeeper sessions, so that each zookeeper session\n *     can be shared by all threads in one service-node. The Header file.\n *\n * Revision history:\n *     2015-12-04, @shengofsun (sunweijie@xiaomi.com)\n */\n\n#include <dsn/utility/singleton_store.h>\n#include <dsn/service_api_cpp.h>\n#include <string>\n\n#pragma once\n\nnamespace dsn {\nnamespace dist {\n\nclass zookeeper_session;\nclass zookeeper_session_mgr : public utils::singleton<zookeeper_session_mgr>\n{\npublic:\n    zookeeper_session *get_session(const service_app_info &info);\n    const char *zoo_hosts() const { return _zoo_hosts.c_str(); }\n    int timeout() const { return _timeout_ms; }\n    const char *zoo_logfile() const { return _zoo_logfile.c_str(); }\n\nprivate:\n    zookeeper_session_mgr();\n    ~zookeeper_session_mgr() = default;\n\n    utils::ex_lock_nr _store_lock;\n    std::string _zoo_hosts;\n    int _timeout_ms;\n    std::string _zoo_logfile;\n\n    friend class utils::singleton<zookeeper_session_mgr>;\n};\n} // namespace dist\n} // namespace dsn\n"
  },
  {
    "path": "thirdparty/CMakeLists.txt",
    "content": "##############################################################################\n# Licensed to the Apache Software Foundation (ASF) under one\n# or more contributor license agreements.  See the NOTICE file\n# distributed with this work for additional information\n# regarding copyright ownership.  The ASF licenses this file\n# to you under the Apache License, Version 2.0 (the\n# \"License\"); you may not use this file except in compliance\n# with the License.  You may obtain a copy of the License at\n#\n#   http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing,\n# software distributed under the License is distributed on an\n# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n# KIND, either express or implied.  See the License for the\n# specific language governing permissions and limitations\n# under the License.\n##############################################################################\n\nproject(pegasus_thirdparties)\ncmake_minimum_required(VERSION 3.11.0)\n\nif (\"${CMAKE_CXX_COMPILER_ID}\" STREQUAL \"GNU\")\n    # require at least gcc 5.4.0\n    if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.4.0)\n        message(FATAL_ERROR \"GCC version must be at least 5.4.0!\")\n    endif ()\nendif ()\n\nif (APPLE)\n    if (${MACOS_OPENSSL_ROOT_DIR} STREQUAL \"\")\n        message(FATAL_ERROR \"OpenSSL root should be set for MacOS\")\n    endif()\nendif()\n\ninclude(ExternalProject)\ninclude(CheckCXXCompilerFlag)\n\nset(TP_DIR ${PROJECT_SOURCE_DIR})\nset(TP_OUTPUT ${PROJECT_SOURCE_DIR}/output)\n\n# TMP_DIR      = <base>/tmp/<name>\n# STAMP_DIR    = <base>/Stamp/<name>\n# DOWNLOAD_DIR = <base>/Download/<name>\n# SOURCE_DIR   = <base>/Source/<name>\n# BINARY_DIR   = <base>/Build/<name>\n# INSTALL_DIR  = <base>/Install/<name>\n# LOG_DIR      = <STAMP_DIR>\nset_property(DIRECTORY PROPERTY EP_BASE ${TP_DIR}/build)\n\nset(OSS_URL_PREFIX \"http://pegasus-thirdparty-package.oss-cn-beijing.aliyuncs.com\")\n\nmessage(STATUS \"Setting up third-parties...\")\n\nExternalProject_Add(boost\n        URL ${OSS_URL_PREFIX}/boost_1_69_0.tar.bz2\n        https://dl.bintray.com/boostorg/release/1.69.0/source/boost_1_69_0.tar.bz2\n        URL_MD5 a1332494397bf48332cb152abfefcec2\n        CONFIGURE_COMMAND ./bootstrap.sh --prefix=. --with-libraries=system,filesystem,regex --with-toolset=gcc\n        BUILD_COMMAND ./b2 toolset=gcc cxxflags=-fPIC cxxstd=11 install\n        INSTALL_COMMAND cp -R include/boost ${TP_OUTPUT}/include && cp -R lib ${TP_OUTPUT}/\n        BUILD_IN_SOURCE 1\n        )\n\n# header-only\nfile(MAKE_DIRECTORY ${TP_OUTPUT}/include/concurrentqueue)\nExternalProject_Add(concurrentqueue\n        URL ${OSS_URL_PREFIX}/concurrentqueue-1.0.1.tar.gz\n        https://codeload.github.com/cameron314/concurrentqueue/tar.gz/v1.0.1\n        URL_MD5 80016b584fddffd67073349efd7b8958\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND cp -R blockingconcurrentqueue.h concurrentqueue.h lightweightsemaphore.h internal/ ${TP_OUTPUT}/include/concurrentqueue\n        BUILD_IN_SOURCE 1\n        )\n\n# header-only\nfile(MAKE_DIRECTORY ${TP_OUTPUT}/include/readerwriterqueue)\nExternalProject_Add(readerwriterqueue\n        URL ${OSS_URL_PREFIX}/readerwriterqueue-1.0.2.tar.gz\n        https://codeload.github.com/cameron314/readerwriterqueue/tar.gz/v1.0.2\n        URL_MD5 9e355a2660bd2810cb1874fb7366906e\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND cp -R atomicops.h readerwriterqueue.h ${TP_OUTPUT}/include/readerwriterqueue\n        BUILD_IN_SOURCE 1\n        )\n\nExternalProject_Add(googletest\n        URL ${OSS_URL_PREFIX}/googletest-release-1.8.0.tar.gz\n        https://codeload.github.com/google/googletest/tar.gz/release-1.8.0\n        URL_MD5 16877098823401d1bf2ed7891d7dce36\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        )\nExternalProject_Get_property(googletest SOURCE_DIR)\nset(googletest_SRC ${SOURCE_DIR})\n\nExternalProject_Add(gperftools\n        URL ${OSS_URL_PREFIX}/gperftools-2.7.tar.gz\n        https://github.com/gperftools/gperftools/releases/download/gperftools-2.7/gperftools-2.7.tar.gz\n        URL_MD5 c6a852a817e9160c79bdb2d3101b4601\n        CONFIGURE_COMMAND ./configure --prefix=${TP_OUTPUT} --enable-static=no --enable-frame-pointers=yes\n        BUILD_IN_SOURCE 1\n        )\n\nset(HDFS_CLIENT_DIR \"hadoop-hdfs-project/hadoop-hdfs-native-client\")\nExternalProject_Add(hadoop\n        URL ${OSS_URL_PREFIX}/hadoop-release-2.8.4.tar.gz\n        https://github.com/apache/hadoop/archive/refs/tags/rel/release-2.8.4.tar.gz\n        URL_MD5 a1be737d4bff14923689619ab6545a96\n        PATCH_COMMAND \"\"\n        COMMAND cd ${HDFS_CLIENT_DIR} && mvn package -Pdist,native -DskipTests -Dmaven.javadoc.skip=true -Dtar\n        COMMAND cd ${HDFS_CLIENT_DIR} && cp -R target/hadoop-hdfs-native-client-2.8.4/include/. ${TP_OUTPUT}/include/hdfs && cp -R target/hadoop-hdfs-native-client-2.8.4/lib/native/. ${TP_OUTPUT}/lib\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND \"\"\n        )\n\n# header-only\nExternalProject_Add(rapidjson\n        URL ${OSS_URL_PREFIX}/rapidjson-1.1.0.tar.gz\n        https://codeload.github.com/Tencent/rapidjson/tar.gz/v1.1.0\n        URL_MD5 badd12c511e081fec6c89c43a7027bce\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND cp -R include/rapidjson ${TP_OUTPUT}/include\n        BUILD_IN_SOURCE 1\n        )\n\nExternalProject_Add(thrift\n        URL ${OSS_URL_PREFIX}/thrift-0.9.3.tar.gz\n        http://archive.apache.org/dist/thrift/0.9.3/thrift-0.9.3.tar.gz\n        URL_MD5 88d667a8ae870d5adeca8cb7d6795442\n        PATCH_COMMAND patch -p1 < ${TP_DIR}/fix_thrift_for_cpp11.patch\n        CMAKE_ARGS -DCMAKE_BUILD_TYPE=release\n        -DWITH_JAVA=OFF\n        -DWITH_PYTHON=OFF\n        -DWITH_C_GLIB=OFF\n        -DWITH_CPP=ON\n        -DBUILD_TESTING=OFF\n        -DBUILD_EXAMPLES=OFF\n        -DWITH_QT5=OFF\n        -DWITH_QT4=OFF\n        -DWITH_OPENSSL=OFF\n        -DBUILD_COMPILER=ON\n        -DBUILD_TUTORIALS=OFF\n        -DWITH_LIBEVENT=OFF\n        -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DCMAKE_POSITION_INDEPENDENT_CODE=ON\n        -DWITH_SHARED_LIB=OFF\n        -DBOOST_ROOT=${TP_OUTPUT}\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        DEPENDS boost\n        )\n\ncheck_cxx_compiler_flag(-Wformat-overflow COMPILER_SUPPORTS_FORMAT_OVERFLOW)\nif (COMPILER_SUPPORTS_FORMAT_OVERFLOW)\n    set(ZOOKEEPER_CFLAGS -Wno-error=format-overflow)\nendif ()\nExternalProject_Add(zookeeper\n        URL ${OSS_URL_PREFIX}/apache-zookeeper-3.7.0.tar.gz\n        http://downloads.apache.org/zookeeper/zookeeper-3.7.0/apache-zookeeper-3.7.0.tar.gz\n        URL_MD5 44c2a33e01931aed94ef7f3d39d0963e\n        PATCH_COMMAND \"\"\n        COMMAND cd zookeeper-jute && mvn compile && cd ../zookeeper-client/zookeeper-client-c && cmake -DCMAKE_BUILD_TYPE=release -DWANT_CPPUNIT=OFF -WITH_OPENSSL=OFF -DWITH_CYRUS_SASL=${TP_OUTPUT} -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        COMMAND cd zookeeper-client/zookeeper-client-c && make\n        COMMAND cp -R zookeeper-client/zookeeper-client-c/include/. ${TP_OUTPUT}/include/zookeeper && cp zookeeper-client/zookeeper-client-c/generated/zookeeper.jute.h ${TP_OUTPUT}/include/zookeeper && cp zookeeper-client/zookeeper-client-c/libzookeeper.a ${TP_OUTPUT}/lib && cp zookeeper-client/zookeeper-client-c/libhashtable.a ${TP_OUTPUT}/lib\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND \"\"\n        BUILD_IN_SOURCE 1\n        )\n\nExternalProject_Add(libevent\n        URL ${OSS_URL_PREFIX}/libevent-release-2.1.8-stable.tar.gz\n        https://github.com/libevent/libevent/archive/release-2.1.8-stable.tar.gz\n        URL_MD5 80f8652e4b08d2ec86a5f5eb46b74510\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DEVENT__DISABLE_DEBUG_MODE=On\n        -DCMAKE_POSITION_INDEPENDENT_CODE=ON\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        )\n\nExternalProject_Add(poco\n        URL ${OSS_URL_PREFIX}/poco-1.11.1-release.tar.gz\n        https://github.com/pocoproject/poco/archive/refs/tags/poco-1.11.1-release.tar.gz\n        URL_MD5 a96210b60a675c4a8183ad1f2099d549\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DOPENSSL_ROOT_DIR=${MACOS_OPENSSL_ROOT_DIR}\n        -DENABLE_MONGODB=OFF\n        -DENABLE_PDF=OFF\n        -DENABLE_DATA=OFF\n        -DENABLE_DATA_SQLITE=OFF\n        -DENABLE_DATA_MYSQL=OFF\n        -DENABLE_DATA_ODBC=OFF\n        -DENABLE_SEVENZIP=OFF\n        -DENABLE_ZIP=OFF\n        -DENABLE_APACHECONNECTOR=OFF\n        -DENABLE_CPPPARSER=OFF\n        -DENABLE_POCODOC=OFF\n        -DENABLE_PAGECOMPILER=OFF\n        -DENABLE_PAGECOMPILER_FILE2PAGE=OFF\n        -DCMAKE_POSITION_INDEPENDENT_CODE=ON\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        DEPENDS boost\n        )\n\nExternalProject_Add(fds\n        URL ${OSS_URL_PREFIX}/fds-1.0.0.tar.gz\n        https://github.com/XiaoMi/galaxy-fds-sdk-cpp/archive/refs/tags/v1.0.0.tar.gz\n        URL_MD5 f7e0f86534f7b15c2a9b349f5cb45503\n        PATCH_COMMAND patch -p1 < ${TP_DIR}/fix_fds_for_macos.patch\n        CMAKE_ARGS -DPOCO_INCLUDE=${TP_OUTPUT}/include\n        -DMACOS_OPENSSL_INCLUDE_DIR=${MACOS_OPENSSL_ROOT_DIR}/include\n        -DPOCO_LIB=${TP_OUTPUT}/lib\n        -DCMAKE_POSITION_INDEPENDENT_CODE=ON\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        INSTALL_COMMAND cp libgalaxy-fds-sdk-cpp.a ${TP_OUTPUT}/lib\n        COMMAND rm -rf ${TP_OUTPUT}/include/fds\n        COMMAND cp -r include fds-include\n        COMMAND mv fds-include ${TP_OUTPUT}/include/fds # install fds headers into a stand-alone directory\n        UPDATE_COMMAND \"\" # do not update\n        DEPENDS googletest poco boost\n        BUILD_IN_SOURCE 1\n        )\n\n# fmtlib >=6.x requires c++14 support, do not update this library for now\nExternalProject_Add(fmt\n        URL ${OSS_URL_PREFIX}/fmt-5.3.0.tar.gz\n        https://github.com/fmtlib/fmt/archive/refs/tags/5.3.0.tar.gz\n        URL_MD5 1015bf3ff2a140dfe03de50ee2469401\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DCMAKE_BUILD_TYPE=release\n        -DFMT_TEST=false\n        -DCMAKE_POSITION_INDEPENDENT_CODE=ON\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        )\n\nExternalProject_Add(gflags\n        URL ${OSS_URL_PREFIX}/gflags-2.2.1.zip\n        https://github.com/gflags/gflags/archive/v2.2.1.zip\n        URL_MD5 2d988ef0b50939fb50ada965dafce96b\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        )\n\n# civetweb is one of the dependencies of promemetheus-cpp, do not build & install\nExternalProject_Add(civetweb\n        URL ${OSS_URL_PREFIX}/civetweb-1.11.tar.gz\n        https://codeload.github.com/civetweb/civetweb/tar.gz/v1.11\n        URL_MD5 b6d2175650a27924bccb747cbe084cd4\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND \"\"\n        )\nExternalProject_Get_property(civetweb SOURCE_DIR)\nset(civetweb_SRC ${SOURCE_DIR})\n\nExternalProject_Add(curl\n        URL ${OSS_URL_PREFIX}/curl-7.47.0.tar.gz\n        http://curl.haxx.se/download/curl-7.47.0.tar.gz\n        URL_MD5 5109d1232d208dfd712c0272b8360393\n        CONFIGURE_COMMAND ./configure --prefix=${TP_OUTPUT}\n        --disable-dict\n        --disable-file\n        --disable-ftp\n        --disable-gopher\n        --disable-imap\n        --disable-ipv6\n        --disable-ldap\n        --disable-ldaps\n        --disable-manual\n        --disable-pop3\n        --disable-rtsp\n        --disable-smtp\n        --disable-telnet\n        --disable-tftp\n        --disable-shared\n        --without-librtmp\n        --without-zlib\n        --without-libssh2\n        --without-ssl\n        --without-libidn\n        BUILD_IN_SOURCE 1\n        )\n\nExternalProject_Add(prometheus-cpp\n        URL ${OSS_URL_PREFIX}/prometheus-cpp-0.7.0.tar.gz\n        https://codeload.github.com/jupp0r/prometheus-cpp/tar.gz/v0.7.0\n        URL_MD5 dc75c31ceaefd160e978365bdca8eb01\n        DEPENDS civetweb curl\n        PATCH_COMMAND rm -rf 3rdparty/civetweb && cp -R ${civetweb_SRC} 3rdparty/civetweb # replace the submodule\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DENABLE_TESTING=OFF\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        )\n\n# header-only\nExternalProject_Add(nlohmann_json\n        URL ${OSS_URL_PREFIX}/nlohmann_json-3.9.1.zip\n        https://github.com/nlohmann/json/releases/download/v3.9.1/include.zip\n        URL_MD5 d2f66c608af689e21d69a33c220e974e\n        CONFIGURE_COMMAND \"\"\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND cp -R include/nlohmann ${TP_OUTPUT}/include\n        BUILD_IN_SOURCE 1\n        )\n\nExternalProject_Add(s2geometry\n        URL ${OSS_URL_PREFIX}/s2geometry-e8d16637a467d9f096a92a6d81a50a9b747ca828.zip\n        https://github.com/neverchanje/s2geometry/archive/e8d16637a467d9f096a92a6d81a50a9b747ca828.zip\n        URL_MD5 75cc44c9c31382083d8a2d0e42590788\n        PATCH_COMMAND patch -p0 < ${TP_DIR}/fix_s2_for_aarch64.patch\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DGTEST_ROOT=${googletest_SRC}/googletest\n        -DBUILD_SHARED_LIBS=OFF\n        -DBUILD_PYTHON=OFF\n        -DBUILD_TESTING=OFF\n        -DBUILD_EXAMPLES=OFF\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        DEPENDS googletest\n        )\n\noption(USE_JEMALLOC \"use jemalloc\" OFF)\nmessage(STATUS \"USE_JEMALLOC = ${USE_JEMALLOC}\")\n\nExternalProject_Add(jemalloc\n        URL ${OSS_URL_PREFIX}/jemalloc-5.2.1.tar.bz2\n        https://github.com/jemalloc/jemalloc/releases/download/5.2.1/jemalloc-5.2.1.tar.bz2\n        URL_MD5 3d41fbf006e6ebffd489bdb304d009ae\n        CONFIGURE_COMMAND ./configure --prefix=${TP_OUTPUT} --enable-cxx --enable-stats --enable-prof\n        BUILD_COMMAND make\n        INSTALL_COMMAND make install\n        BUILD_IN_SOURCE 1\n        )\n\noption(ROCKSDB_PORTABLE \"build a portable binary\" OFF)\n\nExternalProject_Add(rocksdb\n        URL ${OSS_URL_PREFIX}/pegasus-rocksdb-ef29819c7a1ea9334ae170f506b653757f517a52.zip\n        https://github.com/XiaoMi/pegasus-rocksdb/archive/ef29819c7a1ea9334ae170f506b653757f517a52.zip\n        URL_MD5 2a6488cd87c37e0c2e42a5ca74bebc87\n        DEPENDS jemalloc\n        CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${TP_OUTPUT}\n        -DFAIL_ON_WARNINGS=OFF\n        -DWITH_BENCHMARK_TOOLS=OFF\n        -DWITH_TOOLS=OFF\n        -DWITH_LZ4=ON\n        -DWITH_ZSTD=ON\n        -DWITH_SNAPPY=ON\n        -DWITH_BZ2=OFF\n        -DWITH_TESTS=OFF\n        -DWITH_GFLAGS=OFF\n        -DUSE_RTTI=ON\n        -DWITH_JEMALLOC=${USE_JEMALLOC}\n        -DJEMALLOC_ROOT_DIR=${TP_OUTPUT}\n        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}\n        -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}\n        -DPORTABLE=${ROCKSDB_PORTABLE}\n        )\n\nExternalProject_Add(http-parser\n        URL ${OSS_URL_PREFIX}/http-parser-2.9.4.zip\n        https://github.com/nodejs/http-parser/archive/v2.9.4.zip\n        URL_MD5 f6900b9209d3d6b80c70e050ac33b834\n        CONFIGURE_COMMAND mkdir -p ${TP_OUTPUT}/include/nodejs\n        BUILD_COMMAND \"\"\n        INSTALL_COMMAND cp -R http_parser.h ${TP_OUTPUT}/include/nodejs/http_parser.h\n        BUILD_IN_SOURCE 1\n        )\n"
  },
  {
    "path": "thirdparty/fix_fds_for_macos.patch",
    "content": "diff --git a/CMakeLists.txt b/CMakeLists.txt\nindex d2d5f09..6de3158 100644\n--- a/CMakeLists.txt\n+++ b/CMakeLists.txt\n@@ -11,6 +11,9 @@ add_library(galaxy-fds-sdk-cpp ${DIR_SRCS})\n target_include_directories(galaxy-fds-sdk-cpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)\n \n include_directories(${POCO_INCLUDE})\n+if (APPLE)\n+    include_directories(${MACOS_OPENSSL_INCLUDE_DIR})\n+endif()\n link_directories(${POCO_LIB})\n \n option(WITH_TEST \"enable building test\" OFF)\n"
  },
  {
    "path": "thirdparty/fix_s2_for_aarch64.patch",
    "content": "--- CMakeLists.txt\t2022-04-24 06:33:29.829438987 +0000\n+++ CMakeLists.aarch64.txt\t2022-04-24 06:34:09.079226044 +0000\n@@ -355,6 +355,7 @@ install(FILES src/s2/third_party/absl/ba\n               src/s2/third_party/absl/base/port.h\n               src/s2/third_party/absl/base/thread_annotations.h\n         DESTINATION include/s2/third_party/absl/base)\n+execute_process(COMMAND sed -i \"/attributes\\.h/a\\#include \\\"s2\\/base\\/integral_types\\.h\\\"\" ${PROJECT_SOURCE_DIR}/src/s2/third_party/absl/base/internal/unaligned_access.h)\n install(FILES src/s2/third_party/absl/base/internal/identity.h\n               src/s2/third_party/absl/base/internal/inline_variable.h\n               src/s2/third_party/absl/base/internal/invoke.h\n"
  },
  {
    "path": "thirdparty/fix_thrift_for_cpp11.patch",
    "content": "diff --git a/build/cmake/DefinePlatformSpecifc.cmake b/build/cmake/DefinePlatformSpecifc.cmake\nindex 40ec627..dd77fde 100755\n--- a/build/cmake/DefinePlatformSpecifc.cmake\n+++ b/build/cmake/DefinePlatformSpecifc.cmake\n@@ -84,11 +84,11 @@ endif()\n # GCC and Clang.\n if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES \"Clang\")\n   # FIXME -pedantic can not be used at the moment because of: https://issues.apache.org/jira/browse/THRIFT-2784\n-  #set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++11 -O2 -Wall -Wextra -pedantic\")\n+  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++11 -O2 -Wall -Wextra\")\n   # FIXME enabling c++11 breaks some Linux builds on Travis by triggering a g++ bug, see\n   # https://travis-ci.org/apache/thrift/jobs/58017022\n   # on the other hand, both MacOSX and FreeBSD need c++11\n-  if(${CMAKE_SYSTEM_NAME} MATCHES \"Darwin\" OR ${CMAKE_SYSTEM_NAME} MATCHES \"FreeBSD\")\n-    set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++11 -O2 -Wall -Wextra\")\n-  endif()\n+  #if(${CMAKE_SYSTEM_NAME} MATCHES \"Darwin\" OR ${CMAKE_SYSTEM_NAME} MATCHES \"FreeBSD\")\n+  #  set(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} -std=c++11 -O2 -Wall -Wextra\")\n+  #endif()\n endif()\ndiff --git a/lib/cpp/src/thrift/cxxfunctional.h b/lib/cpp/src/thrift/cxxfunctional.h\nindex dadaac3..ef32fe1 100644\n--- a/lib/cpp/src/thrift/cxxfunctional.h\n+++ b/lib/cpp/src/thrift/cxxfunctional.h\n@@ -105,19 +105,19 @@\n   }}} // apache::thrift::stdcxx\n \n #elif _THRIFT_USING_GNU_LIBSTDCXX\n-  #include <tr1/functional>\n+  #include <functional>\n \n   namespace apache { namespace thrift { namespace stdcxx {\n-    using ::std::tr1::function;\n-    using ::std::tr1::bind;\n+    using ::std::function;\n+    using ::std::bind;\n \n     namespace placeholders {\n-      using ::std::tr1::placeholders::_1;\n-      using ::std::tr1::placeholders::_2;\n-      using ::std::tr1::placeholders::_3;\n-      using ::std::tr1::placeholders::_4;\n-      using ::std::tr1::placeholders::_5;\n-      using ::std::tr1::placeholders::_6;\n+      using ::std::placeholders::_1;\n+      using ::std::placeholders::_2;\n+      using ::std::placeholders::_3;\n+      using ::std::placeholders::_4;\n+      using ::std::placeholders::_5;\n+      using ::std::placeholders::_6;\n     } // apache::thrift::stdcxx::placeholders\n   }}} // apache::thrift::stdcxx\n #endif\n"
  }
]